Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Template Analyzer Agent | |
| Analyzes Word document templates to extract structure and sections | |
| """ | |
| import os | |
| import re | |
| from typing import Dict, Any | |
| from docx import Document | |
| from langchain.tools import tool | |
| from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| from langchain.agents import AgentExecutor, create_openai_tools_agent | |
| def analyze_word_template(template_path: str) -> Dict[str, Any]: | |
| """Analyze a Word document template to extract structure and sections.""" | |
| if not os.path.exists(template_path): | |
| raise FileNotFoundError(f"Template file not found: {template_path}") | |
| doc = Document(template_path) | |
| analysis = { | |
| 'sections': [], | |
| 'formatting': {}, | |
| 'document_info': {} | |
| } | |
| # Analyze paragraphs and sections | |
| for i, paragraph in enumerate(doc.paragraphs): | |
| text = paragraph.text.strip() | |
| if text: | |
| # Detect sections - improved regex to catch all section types | |
| if re.search(r'\b(examen|observation|conclusion|résultat|resultat|diagnostic|rapport|échographie|echographie|analyse|commentaire|recommandation|technique|matériel|matériel|méthode|indication)\b', text, re.IGNORECASE): | |
| analysis['sections'].append({ | |
| 'text': text, | |
| 'index': i, | |
| 'style': paragraph.style.name if paragraph.style else 'Normal' | |
| }) | |
| # Analyze formatting | |
| if paragraph.runs: | |
| run = paragraph.runs[0] | |
| analysis['formatting'][i] = { | |
| 'bold': run.bold, | |
| 'italic': run.italic, | |
| 'font_name': run.font.name, | |
| 'font_size': run.font.size.pt if run.font.size else None, | |
| 'alignment': paragraph.alignment | |
| } | |
| # Analyze document properties | |
| if doc.core_properties.title: | |
| analysis['document_info'] = { | |
| 'title': doc.core_properties.title, | |
| 'author': doc.core_properties.author, | |
| 'subject': doc.core_properties.subject | |
| } | |
| return analysis | |
| def create_template_analyzer_agent(llm): | |
| """Create the template analyzer agent.""" | |
| template_analyzer_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", """You are a medical document template analyzer. | |
| Analyze the provided Word template and extract its structure, sections, and formatting. | |
| Provide a detailed analysis that can be used by other agents."""), | |
| ("human", | |
| "Analyze the template at {template_path} and provide a comprehensive analysis."), | |
| MessagesPlaceholder("agent_scratchpad") | |
| ]) | |
| template_analyzer_agent = create_openai_tools_agent( | |
| llm=llm, | |
| tools=[analyze_word_template], | |
| prompt=template_analyzer_prompt | |
| ) | |
| template_analyzer_executor = AgentExecutor( | |
| agent=template_analyzer_agent, | |
| tools=[analyze_word_template], | |
| verbose=True | |
| ) | |
| return template_analyzer_executor | |