Spaces:
Running
Running
| from typing import Dict, Any | |
| import json | |
| from pathlib import Path | |
| from docx import Document | |
| from openpyxl import load_workbook | |
| async def fill_form(template_path: str, data: Dict[str, Any]) -> dict: | |
| """ | |
| Auto-fill form template with extracted data | |
| Args: | |
| template_path: Path to form template (DOCX or XLSX) | |
| data: Dictionary of field values | |
| Returns: | |
| Dict with filled form path and fields filled | |
| """ | |
| try: | |
| file_ext = Path(template_path).suffix.lower() | |
| output_path = template_path.replace(file_ext, f'_filled{file_ext}') | |
| if file_ext == '.docx': | |
| return await fill_docx_form(template_path, data, output_path) | |
| elif file_ext in ['.xlsx', '.xls']: | |
| return await fill_excel_form(template_path, data, output_path) | |
| else: | |
| # Generic text replacement | |
| return await fill_text_form(template_path, data, output_path) | |
| except Exception as e: | |
| import traceback | |
| return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False} | |
| async def fill_docx_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict: | |
| """Fill DOCX form template - Enhanced with comprehensive matching""" | |
| try: | |
| doc = Document(template_path) | |
| fields_filled = [] | |
| debug_info = [] | |
| # Normalize data keys for better matching | |
| normalized_data = {} | |
| for key, value in data.items(): | |
| # Store original | |
| normalized_data[key] = value | |
| # Store lowercase | |
| normalized_data[key.lower()] = value | |
| # Store with underscores | |
| normalized_data[key.lower().replace(" ", "_")] = value | |
| # Store without spaces | |
| normalized_data[key.lower().replace(" ", "")] = value | |
| # First, try to replace placeholders in paragraphs | |
| for paragraph in doc.paragraphs: | |
| for key, value in normalized_data.items(): | |
| placeholders = [ | |
| f"{{{{{key}}}}}", # {{field_name}} | |
| f"{{{key}}}", # {field_name} | |
| ] | |
| for placeholder in placeholders: | |
| if placeholder in paragraph.text: | |
| paragraph.text = paragraph.text.replace(placeholder, str(value)) | |
| if key not in fields_filled: | |
| fields_filled.append(key) | |
| # Enhanced table processing | |
| for table_idx, table in enumerate(doc.tables): | |
| for row_idx, row in enumerate(table.rows): | |
| cells_text = [cell.text.strip() for cell in row.cells] | |
| # Process each cell | |
| for cell_idx, cell in enumerate(row.cells): | |
| cell_text = cell.text.strip().lower() | |
| # Skip empty cells | |
| if not cell_text: | |
| continue | |
| # Check each field | |
| for original_key, value in data.items(): | |
| key_lower = original_key.lower() | |
| key_normalized = key_lower.replace(" ", "").replace("_", "") | |
| cell_normalized = cell_text.replace(" ", "").replace(":", "").replace("_", "") | |
| # Method 1: Exact match (case-insensitive) | |
| if key_lower in cell_text or cell_text in key_lower: | |
| # Fill next cell if exists and is empty | |
| if cell_idx + 1 < len(row.cells): | |
| next_cell = row.cells[cell_idx + 1] | |
| if not next_cell.text.strip() or len(next_cell.text.strip()) < 3: | |
| next_cell.text = str(value) | |
| if original_key not in fields_filled: | |
| fields_filled.append(original_key) | |
| debug_info.append(f"Filled '{original_key}' in cell after '{cell.text.strip()}'") | |
| # Method 2: Fill after colon in same cell | |
| if ":" in cell.text: | |
| parts = cell.text.split(":", 1) | |
| label = parts[0].strip() | |
| cell.text = f"{label}: {value}" | |
| if original_key not in fields_filled: | |
| fields_filled.append(original_key) | |
| debug_info.append(f"Filled '{original_key}' after colon in '{label}'") | |
| # Method 3: Fuzzy match (normalized) | |
| elif key_normalized in cell_normalized or cell_normalized in key_normalized: | |
| if cell_idx + 1 < len(row.cells): | |
| next_cell = row.cells[cell_idx + 1] | |
| if not next_cell.text.strip() or len(next_cell.text.strip()) < 3: | |
| next_cell.text = str(value) | |
| if original_key not in fields_filled: | |
| fields_filled.append(original_key) | |
| debug_info.append(f"Filled '{original_key}' (fuzzy) after '{cell.text.strip()}'") | |
| # Check for placeholders | |
| for key, value in normalized_data.items(): | |
| placeholders = [ | |
| f"{{{{{key}}}}}", | |
| f"{{{key}}}", | |
| ] | |
| for placeholder in placeholders: | |
| if placeholder.lower() in cell.text.lower(): | |
| cell.text = cell.text.replace(placeholder, str(value)) | |
| if key not in fields_filled: | |
| fields_filled.append(key) | |
| debug_info.append(f"Filled placeholder '{placeholder}' with '{key}'") | |
| # Ensure output directory exists | |
| Path(output_path).parent.mkdir(parents=True, exist_ok=True) | |
| doc.save(output_path) | |
| return { | |
| 'success': True, | |
| 'output_path': output_path, | |
| 'fields_filled': list(set(fields_filled)), | |
| 'total_fields': len(set(fields_filled)), | |
| 'debug_info': debug_info | |
| } | |
| except Exception as e: | |
| import traceback | |
| return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False} | |
| async def fill_excel_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict: | |
| """Fill Excel form template""" | |
| try: | |
| wb = load_workbook(template_path) | |
| ws = wb.active | |
| fields_filled = [] | |
| # Search for placeholders and replace | |
| for row in ws.iter_rows(): | |
| for cell in row: | |
| if cell.value and isinstance(cell.value, str): | |
| for key, value in data.items(): | |
| placeholders = [ | |
| f"{{{{{key}}}}}", | |
| f"{{{key}}}", | |
| ] | |
| for placeholder in placeholders: | |
| if placeholder in cell.value: | |
| cell.value = cell.value.replace(placeholder, str(value)) | |
| if key not in fields_filled: | |
| fields_filled.append(key) | |
| # Ensure output directory exists | |
| Path(output_path).parent.mkdir(parents=True, exist_ok=True) | |
| wb.save(output_path) | |
| return { | |
| 'success': True, | |
| 'output_path': output_path, | |
| 'fields_filled': list(set(fields_filled)), | |
| 'total_fields': len(set(fields_filled)) | |
| } | |
| except Exception as e: | |
| import traceback | |
| return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False} | |
| async def fill_text_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict: | |
| """Fill text-based form""" | |
| try: | |
| with open(template_path, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| fields_filled = [] | |
| for key, value in data.items(): | |
| placeholders = [ | |
| f"{{{{{key}}}}}", | |
| f"{{{key}}}", | |
| ] | |
| for placeholder in placeholders: | |
| if placeholder in content: | |
| content = content.replace(placeholder, str(value)) | |
| if key not in fields_filled: | |
| fields_filled.append(key) | |
| # Ensure output directory exists | |
| Path(output_path).parent.mkdir(parents=True, exist_ok=True) | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| f.write(content) | |
| return { | |
| 'success': True, | |
| 'output_path': output_path, | |
| 'fields_filled': fields_filled, | |
| 'total_fields': len(fields_filled) | |
| } | |
| except Exception as e: | |
| import traceback | |
| return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False} | |
| async def generate_form_template(fields: list) -> dict: | |
| """Generate a blank form template with specified fields""" | |
| try: | |
| doc = Document() | |
| doc.add_heading('Form Template', 0) | |
| for field in fields: | |
| p = doc.add_paragraph() | |
| p.add_run(f"{field}: ").bold = True | |
| p.add_run(f"{{{{{field}}}}}") | |
| # Ensure output directory exists | |
| output_path = "data/outputs/form_template.docx" | |
| Path(output_path).parent.mkdir(parents=True, exist_ok=True) | |
| doc.save(output_path) | |
| return { | |
| 'success': True, | |
| 'output_path': output_path, | |
| 'fields': fields | |
| } | |
| except Exception as e: | |
| import traceback | |
| return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False} |