Spaces:

MCP-1st-Birthday
/

LifeAdmin-AI

Running

App Files Files Community

Maheen001 commited on 6 days ago

Commit

474279e

verified ·

1 Parent(s): ca3501a

Update tools/form_filler_server.py

Browse files

Files changed (1) hide show

tools/form_filler_server.py +98 -84

tools/form_filler_server.py CHANGED Viewed

@@ -1,8 +1,14 @@
 from typing import Dict, Any
 import json
 from pathlib import Path
 from docx import Document
 from openpyxl import load_workbook
 async def fill_form(template_path: str, data: Dict[str, Any]) -> dict:
@@ -34,100 +40,117 @@ async def fill_form(template_path: str, data: Dict[str, Any]) -> dict:
 async def fill_docx_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict:
-    """Fill DOCX form template - Enhanced with comprehensive matching"""
     try:
         doc = Document(template_path)
         fields_filled = []
         debug_info = []
-        # Normalize data keys for better matching
         normalized_data = {}
         for key, value in data.items():
-            # Store original
-            normalized_data[key] = value
-            # Store lowercase
-            normalized_data[key.lower()] = value
-            # Store with underscores
-            normalized_data[key.lower().replace(" ", "_")] = value
-            # Store without spaces
-            normalized_data[key.lower().replace(" ", "")] = value
-        # First, try to replace placeholders in paragraphs
-        for paragraph in doc.paragraphs:
-            for key, value in normalized_data.items():
-                placeholders = [
-                    f"{{{{{key}}}}}",  # {{field_name}}
-                    f"{{{key}}}",      # {field_name}
                 ]
-                for placeholder in placeholders:
-                    if placeholder in paragraph.text:
-                        paragraph.text = paragraph.text.replace(placeholder, str(value))
                         if key not in fields_filled:
                             fields_filled.append(key)
-        # Enhanced table processing
         for table_idx, table in enumerate(doc.tables):
             for row_idx, row in enumerate(table.rows):
-                cells_text = [cell.text.strip() for cell in row.cells]
-                # Process each cell
                 for cell_idx, cell in enumerate(row.cells):
-                    cell_text = cell.text.strip().lower()
-                    # Skip empty cells
                     if not cell_text:
                         continue
-                    # Check each field
                     for original_key, value in data.items():
-                        key_lower = original_key.lower()
-                        key_normalized = key_lower.replace(" ", "").replace("_", "")
-                        cell_normalized = cell_text.replace(" ", "").replace(":", "").replace("_", "")
-                        # Method 1: Exact match (case-insensitive)
-                        if key_lower in cell_text or cell_text in key_lower:
-                            # Fill next cell if exists and is empty
-                            if cell_idx + 1 < len(row.cells):
-                                next_cell = row.cells[cell_idx + 1]
-                                if not next_cell.text.strip() or len(next_cell.text.strip()) < 3:
-                                    next_cell.text = str(value)
-                                    if original_key not in fields_filled:
-                                        fields_filled.append(original_key)
-                                        debug_info.append(f"Filled '{original_key}' in cell after '{cell.text.strip()}'")
-                            # Method 2: Fill after colon in same cell
                             if ":" in cell.text:
                                 parts = cell.text.split(":", 1)
-                                label = parts[0].strip()
-                                cell.text = f"{label}: {value}"
                                 if original_key not in fields_filled:
                                     fields_filled.append(original_key)
-                                    debug_info.append(f"Filled '{original_key}' after colon in '{label}'")
-                        # Method 3: Fuzzy match (normalized)
-                        elif key_normalized in cell_normalized or cell_normalized in key_normalized:
                             if cell_idx + 1 < len(row.cells):
                                 next_cell = row.cells[cell_idx + 1]
-                                if not next_cell.text.strip() or len(next_cell.text.strip()) < 3:
                                     next_cell.text = str(value)
                                     if original_key not in fields_filled:
                                         fields_filled.append(original_key)
-                                        debug_info.append(f"Filled '{original_key}' (fuzzy) after '{cell.text.strip()}'")
-                    # Check for placeholders
                     for key, value in normalized_data.items():
                         placeholders = [
                             f"{{{{{key}}}}}",
                             f"{{{key}}}",
                         ]
                         for placeholder in placeholders:
                             if placeholder.lower() in cell.text.lower():
                                 cell.text = cell.text.replace(placeholder, str(value))
-                                if key not in fields_filled:
-                                    fields_filled.append(key)
-                                    debug_info.append(f"Filled placeholder '{placeholder}' with '{key}'")
         # Ensure output directory exists
         Path(output_path).parent.mkdir(parents=True, exist_ok=True)
@@ -153,19 +176,36 @@ async def fill_excel_form(template_path: str, data: Dict[str, Any], output_path:
         ws = wb.active
         fields_filled = []
-        # Search for placeholders and replace
         for row in ws.iter_rows():
-            for cell in row:
                 if cell.value and isinstance(cell.value, str):
                     for key, value in data.items():
                         placeholders = [
                             f"{{{{{key}}}}}",
                             f"{{{key}}}",
                         ]
                         for placeholder in placeholders:
-                            if placeholder in cell.value:
-                                cell.value = cell.value.replace(placeholder, str(value))
                                 if key not in fields_filled:
                                     fields_filled.append(key)
@@ -196,6 +236,7 @@ async def fill_text_form(template_path: str, data: Dict[str, Any], output_path:
             placeholders = [
                 f"{{{{{key}}}}}",
                 f"{{{key}}}",
             ]
             for placeholder in placeholders:
@@ -217,33 +258,6 @@ async def fill_text_form(template_path: str, data: Dict[str, Any], output_path:
             'total_fields': len(fields_filled)
         }
-    except Exception as e:
-        import traceback
-        return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}
-async def generate_form_template(fields: list) -> dict:
-    """Generate a blank form template with specified fields"""
-    try:
-        doc = Document()
-        doc.add_heading('Form Template', 0)
-        for field in fields:
-            p = doc.add_paragraph()
-            p.add_run(f"{field}: ").bold = True
-            p.add_run(f"{{{{{field}}}}}")
-        # Ensure output directory exists
-        output_path = "data/outputs/form_template.docx"
-        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
-        doc.save(output_path)
-        return {
-            'success': True,
-            'output_path': output_path,
-            'fields': fields
-        }
     except Exception as e:
         import traceback
         return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}

+"""
+Form Filler Server - COMPLETE FIXED VERSION
+Handles DOCX, XLSX, and text form templates with robust field matching
+"""
 from typing import Dict, Any
 import json
 from pathlib import Path
 from docx import Document
 from openpyxl import load_workbook
+import re
 async def fill_form(template_path: str, data: Dict[str, Any]) -> dict:
 async def fill_docx_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict:
+    """Fill DOCX form template - COMPLETE FIXED VERSION with robust matching"""
     try:
         doc = Document(template_path)
         fields_filled = []
         debug_info = []
+        # Normalize data keys for comprehensive matching
         normalized_data = {}
         for key, value in data.items():
+            # Create multiple normalized versions of each key
+            key_variations = [
+                key,                                    # Original: "First Name"
+                key.lower(),                           # Lowercase: "first name"
+                key.lower().replace(" ", ""),          # No spaces: "firstname"
+                key.lower().replace(" ", "_"),         # Underscores: "first_name"
+                key.replace(" ", ""),                  # No spaces original case: "FirstName"
+                key.replace(" ", "_"),                 # Underscores original case: "First_Name"
+            ]
+            for variation in key_variations:
+                normalized_data[variation] = value
+        # Process all paragraphs
+        for para_idx, paragraph in enumerate(doc.paragraphs):
+            original_text = paragraph.text
+            modified = False
+            # Try to find and replace fields in paragraphs
+            for key, value in data.items():
+                # Check for various formats
+                patterns = [
+                    (f"{key}:", f"{key}: {value}"),           # "First Name:" -> "First Name: John"
+                    (f"{key} :", f"{key}: {value}"),          # "First Name :" -> "First Name: John"
+                    (f"{key} _", f"{key}: {value}"),          # "First Name _" -> "First Name: John"
+                    (f"{key}__", f"{key}: {value}"),          # "First Name__" -> "First Name: John"
                 ]
+                for pattern, replacement in patterns:
+                    if pattern in paragraph.text:
+                        paragraph.text = paragraph.text.replace(pattern, replacement)
                         if key not in fields_filled:
                             fields_filled.append(key)
+                            debug_info.append(f"Filled '{key}' in paragraph {para_idx}")
+                        modified = True
+                        break
+                if modified:
+                    break
+        # Process all tables (primary method for form templates)
         for table_idx, table in enumerate(doc.tables):
             for row_idx, row in enumerate(table.rows):
                 for cell_idx, cell in enumerate(row.cells):
+                    cell_text = cell.text.strip()
                     if not cell_text:
                         continue
+                    # Method 1: Direct label matching with colon
                     for original_key, value in data.items():
+                        # Create normalized version of cell text
+                        cell_normalized = cell_text.lower().replace(":", "").strip()
+                        key_normalized = original_key.lower().replace(":", "").strip()
+                        # Check if cell contains the field label
+                        if key_normalized == cell_normalized or cell_normalized.startswith(key_normalized):
+                            # Check if value is already filled
                             if ":" in cell.text:
                                 parts = cell.text.split(":", 1)
+                                if len(parts) == 2 and not parts[1].strip():
+                                    # Empty after colon, fill it
+                                    cell.text = f"{parts[0]}: {value}"
+                                    if original_key not in fields_filled:
+                                        fields_filled.append(original_key)
+                                        debug_info.append(f"Filled '{original_key}' in table {table_idx}, row {row_idx}, cell {cell_idx}")
+                            else:
+                                # No colon, add it
+                                cell.text = f"{cell.text}: {value}"
                                 if original_key not in fields_filled:
                                     fields_filled.append(original_key)
+                                    debug_info.append(f"Filled '{original_key}' in table {table_idx}, row {row_idx}, cell {cell_idx}")
+                        # Method 2: Check next cell in same row
+                        elif key_normalized in cell_normalized:
                             if cell_idx + 1 < len(row.cells):
                                 next_cell = row.cells[cell_idx + 1]
+                                # Only fill if next cell is empty or has placeholder
+                                if not next_cell.text.strip() or next_cell.text.strip() in ["_", "__", "___", ""]:
                                     next_cell.text = str(value)
                                     if original_key not in fields_filled:
                                         fields_filled.append(original_key)
+                                        debug_info.append(f"Filled '{original_key}' in next cell of table {table_idx}, row {row_idx}")
+                    # Method 3: Check for placeholder patterns in cells
                     for key, value in normalized_data.items():
                         placeholders = [
                             f"{{{{{key}}}}}",
                             f"{{{key}}}",
+                            f"[{key}]",
                         ]
                         for placeholder in placeholders:
                             if placeholder.lower() in cell.text.lower():
                                 cell.text = cell.text.replace(placeholder, str(value))
+                                # Find original key
+                                original_key = next((k for k in data.keys()
+                                                   if k.lower().replace(" ", "") == key.lower().replace(" ", "")),
+                                                  key)
+                                if original_key not in fields_filled:
+                                    fields_filled.append(original_key)
+                                    debug_info.append(f"Filled placeholder '{placeholder}' in table {table_idx}")
         # Ensure output directory exists
         Path(output_path).parent.mkdir(parents=True, exist_ok=True)
         ws = wb.active
         fields_filled = []
+        # Iterate through all cells
         for row in ws.iter_rows():
+            for cell_idx, cell in enumerate(row):
                 if cell.value and isinstance(cell.value, str):
+                    cell_text = str(cell.value).strip()
+                    # Check each field
                     for key, value in data.items():
+                        key_normalized = key.lower().replace(":", "").strip()
+                        cell_normalized = cell_text.lower().replace(":", "").strip()
+                        # If cell contains field label
+                        if key_normalized == cell_normalized or key_normalized in cell_normalized:
+                            # Try to fill next cell
+                            next_cell = row[cell_idx + 1] if cell_idx + 1 < len(row) else None
+                            if next_cell and (not next_cell.value or str(next_cell.value).strip() == ""):
+                                next_cell.value = str(value)
+                                if key not in fields_filled:
+                                    fields_filled.append(key)
+                        # Check for placeholders
                         placeholders = [
                             f"{{{{{key}}}}}",
                             f"{{{key}}}",
+                            f"[{key}]",
                         ]
                         for placeholder in placeholders:
+                            if placeholder in cell_text:
+                                cell.value = cell_text.replace(placeholder, str(value))
                                 if key not in fields_filled:
                                     fields_filled.append(key)
             placeholders = [
                 f"{{{{{key}}}}}",
                 f"{{{key}}}",
+                f"[{key}]",
             ]
             for placeholder in placeholders:
             'total_fields': len(fields_filled)
         }
     except Exception as e:
         import traceback
         return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}