"""Custom tools for the content generation agent system.""" import xml.etree.ElementTree as ET from typing import Any import requests from duckduckgo_search import DDGS def search_papers(topic: str, max_results: int = 5) -> dict[str, Any]: """Search for academic papers and research articles on a given topic. This tool searches for recent academic papers, research articles, and scientific publications related to the specified topic. It provides summaries and links to help build credible, research-backed content. Args: topic: The research topic or subject to search for (e.g., "machine learning interpretability") max_results: Maximum number of papers to return (default: 5) Returns: A dictionary containing: - status: "success" or "error" - papers: List of paper dictionaries with title, authors, summary, link - error_message: Error description if status is "error" """ try: # Use arXiv API for academic papers # Format: http://export.arxiv.org/api/query?search_query=all:{topic}&max_results={max_results} base_url = "http://export.arxiv.org/api/query" params = { "search_query": f"all:{topic}", "max_results": max_results, "sortBy": "submittedDate", "sortOrder": "descending", } response = requests.get(base_url, params=params, timeout=10) response.raise_for_status() # Parse XML response using proper XML parser # Design decision: We use ElementTree instead of string parsing for robustness # and proper handling of XML namespaces, encoding, and malformed entries. try: root = ET.fromstring(response.content) except ET.ParseError as e: return { "status": "error", "error_message": f"Failed to parse arXiv XML response: {str(e)}", } # arXiv API uses Atom namespace namespace = {"atom": "http://www.w3.org/2005/Atom"} # Extract papers from XML entries papers = [] entries = root.findall("atom:entry", namespace) for entry in entries[:max_results]: try: # Extract title (remove extra whitespace and newlines) title_elem = entry.find("atom:title", namespace) title = ( " ".join(title_elem.text.strip().split()) if title_elem is not None else "Untitled" ) # Extract summary (limit to 300 chars for readability) summary_elem = entry.find("atom:summary", namespace) if summary_elem is not None: summary = " ".join(summary_elem.text.strip().split()) summary = summary[:300] + ("..." if len(summary) > 300 else "") else: summary = "No summary available" # Extract paper ID/link id_elem = entry.find("atom:id", namespace) link = id_elem.text.strip() if id_elem is not None else "" # Extract authors (first 3 authors for brevity) authors = [] author_elems = entry.findall("atom:author", namespace) for author_elem in author_elems[:3]: name_elem = author_elem.find("atom:name", namespace) if name_elem is not None: authors.append(name_elem.text.strip()) papers.append( { "title": title, "authors": ", ".join(authors) if authors else "Unknown", "summary": summary, "link": link, } ) except Exception: # Skip malformed entries but continue processing continue if not papers: return {"status": "error", "error_message": f"No papers found for topic: {topic}"} return {"status": "success", "papers": papers, "count": len(papers)} except requests.RequestException as e: return {"status": "error", "error_message": f"Failed to search papers: {str(e)}"} except Exception as e: return {"status": "error", "error_message": f"Unexpected error: {str(e)}"} def search_web(query: str, max_results: int = 5) -> dict[str, Any]: """Search the web for information using DuckDuckGo. Use this tool to find: - Recent news and industry trends - Blog posts and technical articles - Company information and market data - Real-world examples and case studies Args: query: The search query max_results: Maximum number of results to return (default: 5) Returns: A dictionary containing: - status: "success" or "error" - results: List of search results (title, link, snippet) - error_message: Error description if status is "error" """ try: with DDGS() as ddgs: results = list(ddgs.text(query, max_results=max_results)) if not results: return {"status": "success", "results": [], "count": 0} formatted_results = [] for r in results: formatted_results.append( { "title": r.get("title", ""), "link": r.get("href", ""), "snippet": r.get("body", ""), } ) return {"status": "success", "results": formatted_results, "count": len(formatted_results)} except Exception as e: return {"status": "error", "error_message": f"Web search error: {str(e)}"} def format_for_platform(content: str, platform: str, topic: str = "") -> dict[str, Any]: """Format content appropriately for different social media platforms. Adjusts content length, structure, and style based on platform requirements: - Blog: Long-form, structured with headings (1000-2000 words) - LinkedIn: Professional, medium-length with key takeaways (300-800 words) - Twitter: Concise thread format, engaging hooks (280 chars per tweet) Args: content: The raw content to format platform: Target platform ("blog", "linkedin", or "twitter") topic: Optional topic for context (used for hashtags, etc.) Returns: A dictionary containing: - status: "success" or "error" - formatted_content: Platform-optimized content - metadata: Platform-specific metadata (hashtags, structure, etc.) - error_message: Error description if status is "error" """ try: platform = platform.lower() if platform not in ["blog", "linkedin", "twitter"]: return { "status": "error", "error_message": f"Unsupported platform: {platform}. Use 'blog', 'linkedin', or 'twitter'.", } metadata = {} if platform == "blog": # Blog: Add structure with markdown metadata = { "format": "markdown", "target_length": "1000-2000 words", "structure": "Title โ†’ Introduction โ†’ Main sections with H2/H3 โ†’ Conclusion โ†’ References", } formatted = f"""# {topic if topic else "Article Title"} {content} ## References [Add citations here] """ elif platform == "linkedin": # LinkedIn: Professional tone with emojis and key takeaways metadata = { "format": "plain text with limited formatting", "target_length": "300-800 words", "best_practices": "Start with hook, use line breaks, end with call-to-action", } # Add structure formatted = f"""๐Ÿ”ฌ {topic if topic else "Professional Insight"} {content} ๐Ÿ’ก Key Takeaways: [Summarize 3-5 bullet points] What are your thoughts? Share in the comments below! ๐Ÿ‘‡ #Research #Science #Innovation """ elif platform == "twitter": # Twitter: Break into thread metadata = { "format": "thread (multiple tweets)", "target_length": "280 characters per tweet", "best_practices": "Number tweets (1/n), use hooks, add relevant hashtags", } # Basic thread structure formatted = f"""๐Ÿงต Thread: {topic if topic else "Key Insights"} 1/๐Ÿงต {content[:250]}... [Continue thread - AI will expand this into full thread] #Research #Science """ return { "status": "success", "formatted_content": formatted, "platform": platform, "metadata": metadata, } except Exception as e: return {"status": "error", "error_message": f"Formatting error: {str(e)}"} def generate_citations(sources: list[dict[str, str]], style: str = "apa") -> dict[str, Any]: """Generate properly formatted citations from source information. Creates academic-style citations from paper/article metadata to ensure content credibility and proper attribution. Args: sources: List of source dictionaries with keys: title, authors, link, year (optional) style: Citation style ("apa", "mla", or "chicago") - default is "apa" Returns: A dictionary containing: - status: "success" or "error" - citations: List of formatted citation strings - inline_format: Example of how to cite inline - error_message: Error description if status is "error" """ try: if not sources: return {"status": "error", "error_message": "No sources provided for citation"} style = style.lower() if style not in ["apa", "mla", "chicago"]: style = "apa" # Default to APA citations = [] for i, source in enumerate(sources, 1): title = source.get("title", "Untitled") authors = source.get("authors", "Unknown") link = source.get("link", "") year = source.get("year", "n.d.") if style == "apa": # APA: Authors (Year). Title. Retrieved from URL citation = f"{authors} ({year}). {title}. {link}" elif style == "mla": # MLA: Authors. "Title." Web. URL citation = f'{authors}. "{title}." Web. {link}' else: # chicago # Chicago: Authors. "Title." Accessed URL citation = f'{authors}. "{title}." {link}' citations.append(f"[{i}] {citation}") inline_format = {"apa": "(Author, Year)", "mla": "(Author)", "chicago": "(Author Year)"} return { "status": "success", "citations": citations, "style": style, "inline_format": inline_format.get(style, "(Author, Year)"), "count": len(citations), } except Exception as e: return {"status": "error", "error_message": f"Citation generation error: {str(e)}"} def extract_key_findings(research_text: str, max_findings: int = 5) -> dict[str, Any]: """Extract key findings and insights from research text. Parses research summaries to identify the most important findings, conclusions, and actionable insights for content creation. Args: research_text: Raw research text to analyze max_findings: Maximum number of key findings to extract (default: 5) Returns: A dictionary containing: - status: "success" or "error" - findings: List of key finding strings - summary: Brief overall summary - error_message: Error description if status is "error" """ try: if not research_text or len(research_text.strip()) < 50: return {"status": "error", "error_message": "Insufficient research text provided"} # Simple keyword-based extraction (in production, use NLP/LLM) sentences = research_text.replace("\n", " ").split(". ") # Look for sentences with key indicator words indicators = [ "found", "discovered", "showed", "demonstrated", "revealed", "concluded", "suggests", "indicates", "proves", "confirms", "important", "significant", "key", "main", "primary", ] findings = [] for sentence in sentences: sentence = sentence.strip() if any(indicator in sentence.lower() for indicator in indicators): findings.append(sentence if sentence.endswith(".") else sentence + ".") if len(findings) >= max_findings: break # If not enough findings, take first few substantial sentences if len(findings) < max_findings: for sentence in sentences: sentence = sentence.strip() if len(sentence) > 30 and sentence not in findings: findings.append(sentence if sentence.endswith(".") else sentence + ".") if len(findings) >= max_findings: break summary = f"Analysis of research text identified {len(findings)} key findings and insights." return { "status": "success", "findings": findings[:max_findings], "summary": summary, "count": len(findings[:max_findings]), } except Exception as e: return {"status": "error", "error_message": f"Key finding extraction error: {str(e)}"} def search_industry_trends( field: str, region: str = "global", max_results: int = 5 ) -> dict[str, Any]: """Search for industry trends, job market demands, and hiring patterns in AI/ML. Identifies what companies are looking for, hot skills in demand, and industry pain points that professionals can address. Useful for aligning content with market opportunities. Args: field: The AI/ML field to analyze (e.g., "Machine Learning", "NLP", "Computer Vision") region: Geographic region for job market analysis (default: "global") max_results: Maximum number of trends to return (default: 5) Returns: A dictionary containing: - status: "success" or "error" - trends: List of current industry trends and demands - hot_skills: Technologies/frameworks in high demand - pain_points: Common business challenges to address - error_message: Error description if status is "error" """ try: # Use search_web to find real trends search_query = f"latest trends in {field} {region} {2024}" # We'll use the newly created search_web function # Note: In a real circular dependency scenario, we might need to handle imports differently, # but here they are in the same file. search_results = search_web(search_query, max_results=max_results) if search_results.get("status") == "error": return search_results results = search_results.get("results", []) trends = [] for r in results: trends.append(f"{r['title']}: {r['snippet']}") if not trends: # Fallback if search fails to return good results trends = [ f"Growing demand for {field} expertise in {region}", f"Companies seeking production-ready {field} solutions", "Emphasis on practical implementation over pure research", ] # Basic skill mapping is still useful as a baseline skill_mapping = { "machine learning": ["PyTorch", "TensorFlow", "Scikit-learn", "MLflow", "Kubeflow"], "nlp": ["Transformers", "LangChain", "OpenAI API", "HuggingFace", "spaCy"], "computer vision": ["OpenCV", "YOLO", "SAM", "Detectron2", "PIL"], "llm": ["LangChain", "LlamaIndex", "Vector Databases", "Prompt Engineering", "RAG"], "mlops": ["MLflow", "Kubeflow", "Docker", "Kubernetes", "AWS SageMaker"], } field_lower = field.lower() hot_skills = [] for key in skill_mapping: if key in field_lower: hot_skills.extend(skill_mapping[key][:3]) if not hot_skills: hot_skills = ["Python", "PyTorch", "Cloud Platforms", "API Development"] pain_points = [ f"Difficulty finding experienced {field} professionals", f"Bridging gap between research papers and production code in {field}", f"Scaling {field} solutions from prototype to enterprise", f"Explaining ROI of {field} investments to executives", f"Maintaining and monitoring {field} systems in production", ] return { "status": "success", "trends": trends[:max_results], "hot_skills": list(set(hot_skills)), "pain_points": pain_points[:max_results], "region": region, "field": field, } except Exception as e: return {"status": "error", "error_message": f"Industry trends search error: {str(e)}"} def generate_seo_keywords(topic: str, role: str = "AI Consultant") -> dict[str, Any]: """Generate LinkedIn SEO keywords that recruiters search for. Creates role-specific keywords and technology terms that improve visibility in recruiter searches and LinkedIn's algorithm. Args: topic: The content topic or expertise area role: Target professional role (e.g., "AI Consultant", "ML Engineer") Returns: A dictionary containing: - status: "success" or "error" - primary_keywords: Main role-based keywords - technical_keywords: Technology and framework terms - action_keywords: Skill-based action verbs - combined_phrases: Optimized keyword combinations - error_message: Error description if status is "error" """ try: # Role-based keywords role_keywords = { "consultant": ["AI Consultant", "ML Consultant", "AI Strategy", "Technical Advisor"], "engineer": ["ML Engineer", "AI Engineer", "Machine Learning Engineer"], "specialist": ["AI Specialist", "ML Specialist", "Data Science Specialist"], "expert": ["AI Expert", "ML Expert", "Subject Matter Expert"], "architect": ["AI Architect", "ML Architect", "Solutions Architect"], } role_lower = role.lower() primary_keywords = [role] for key in role_keywords: if key in role_lower: primary_keywords.extend(role_keywords[key][:2]) # Technical keywords based on topic technical_keywords = [] topic_lower = topic.lower() tech_mapping = { "language": ["NLP", "LLM", "Transformers", "GPT", "BERT"], "vision": ["Computer Vision", "CNN", "Object Detection", "Image Recognition"], "learning": ["Deep Learning", "Neural Networks", "PyTorch", "TensorFlow"], "agent": ["AI Agents", "Multi-Agent Systems", "LangChain", "Autonomous Systems"], "data": ["Data Science", "Feature Engineering", "Model Training"], } for key in tech_mapping: if key in topic_lower: technical_keywords.extend(tech_mapping[key][:3]) if not technical_keywords: technical_keywords = ["Machine Learning", "Artificial Intelligence", "Python"] # Action keywords (skills) action_keywords = [ "AI Development", "Model Deployment", "MLOps", "Production ML", "Algorithm Design", "Technical Leadership", "AI Strategy", ] # Combined optimized phrases combined_phrases = [ f"{primary_keywords[0]} | {technical_keywords[0]}", f"Expert in {technical_keywords[0]} and {technical_keywords[1] if len(technical_keywords) > 1 else 'ML'}", f"{action_keywords[0]} | {action_keywords[1]}", ] return { "status": "success", "primary_keywords": list(set(primary_keywords))[:5], "technical_keywords": list(set(technical_keywords))[:5], "action_keywords": action_keywords[:5], "combined_phrases": combined_phrases, "total_keywords": len(set(primary_keywords + technical_keywords + action_keywords)), } except Exception as e: return {"status": "error", "error_message": f"SEO keyword generation error: {str(e)}"} def create_engagement_hooks(topic: str, goal: str = "opportunities") -> dict[str, Any]: """Create engagement hooks that invite professional connections and opportunities. Generates calls-to-action, questions, and portfolio mentions that encourage recruiters and potential clients to connect. Args: topic: The content topic goal: Content goal ("opportunities", "discussion", "credibility", "visibility") Returns: A dictionary containing: - status: "success" or "error" - opening_hooks: Attention-grabbing opening lines - closing_ctas: Strong calls-to-action - discussion_questions: Questions that spark engagement - portfolio_prompts: Ways to mention your work - error_message: Error description if status is "error" """ try: goal = goal.lower() # Opening hooks based on goal opening_hooks = { "opportunities": [ f"Working with companies on {topic}? Here's what I've learned...", f"After implementing {topic} for multiple clients, one thing is clear:", f"Most {topic} projects fail because of this one mistake:", ], "discussion": [ f"Hot take on {topic}:", f"Here's what nobody tells you about {topic}:", f"The {topic} landscape just shifted. Here's why it matters:", ], "credibility": [ f"Deep dive into {topic} based on hands-on experience:", f"Technical breakdown of {topic} that actually works in production:", f"What I learned implementing {topic} at scale:", ], "visibility": [ f"๐Ÿ”ฅ {topic} is evolving faster than ever. Here's what you need to know:", f"Everyone's talking about {topic}, but here's what they're missing:", f"3 things about {topic} that changed how I work:", ], } # Closing CTAs based on goal closing_ctas = { "opportunities": [ "Looking to implement this in your organization? Let's connect and discuss your needs.", "Need help with your {topic} project? DM me to explore collaboration.", "Building something similar? I'd love to hear about your approach. Drop a comment or message me.", ], "discussion": [ "What's your take on this? Agree or disagree? Let's discuss in the comments!", "Have you encountered this in your work? Share your experience below.", "Curious how this applies to your use case? Let's chat!", ], "credibility": [ "Want to dive deeper into the technical details? Connect with me.", "Questions about the implementation? Happy to share insights.", "Follow for more technical deep-dives on {topic}.", ], "visibility": [ "๐Ÿ”” Follow for more insights on {topic} and AI/ML trends.", "๐Ÿ‘‰ Repost if you found this valuable. Tag someone who needs to see this.", "๐Ÿ’ฌ What would you add to this list? Comment below!", ], } # Discussion questions discussion_questions = [ f"What's been your biggest challenge with {topic}?", f"Are you seeing similar trends with {topic} in your industry?", f"Which aspect of {topic} should I cover next?", f"What's your hot take on the future of {topic}?", f"Have you tried implementing {topic}? What were your results?", ] # Portfolio prompts portfolio_prompts = [ f"In my recent project on {topic}, I discovered...", f"While building a {topic} solution, here's what worked:", f"My open-source work on {topic} taught me...", f"Check out my GitHub for {topic} implementations that...", f"Drawing from my Kaggle competition on {topic}...", ] return { "status": "success", "opening_hooks": opening_hooks.get(goal, opening_hooks["credibility"])[:3], "closing_ctas": [ cta.replace("{topic}", topic) for cta in closing_ctas.get(goal, closing_ctas["opportunities"])[:3] ], "discussion_questions": discussion_questions[:3], "portfolio_prompts": portfolio_prompts[:3], "goal": goal, } except Exception as e: return {"status": "error", "error_message": f"Engagement hook creation error: {str(e)}"} def analyze_content_for_opportunities( content: str, target_role: str = "AI Consultant" ) -> dict[str, Any]: """Analyze content for recruiter appeal and opportunity generation potential. Scores content based on factors that attract professional opportunities: SEO keywords, engagement hooks, portfolio mentions, and business value. Args: content: The content to analyze target_role: Target professional role for scoring Returns: A dictionary containing: - status: "success" or "error" - opportunity_score: Overall score (0-100) - seo_score: SEO keyword presence (0-100) - engagement_score: Engagement hook effectiveness (0-100) - value_score: Business value communication (0-100) - suggestions: List of improvement suggestions - error_message: Error description if status is "error" """ try: if not content or len(content) < 100: return { "status": "error", "error_message": "Content too short for meaningful analysis (minimum 100 characters)", } content_lower = content.lower() # SEO keyword scoring # Design decision: We check for both role-based keywords (consultant, engineer) # and technical terms (PyTorch, TensorFlow) because recruiters search using both. # The multiplier of 200 ensures that hitting ~50% of keywords gives a good score. seo_keywords = [ "ai", "machine learning", "ml", "deep learning", "neural network", "python", "tensorflow", "pytorch", "consulting", "engineer", "architect", "specialist", "expert", ] seo_hits = sum(1 for keyword in seo_keywords if keyword in content_lower) seo_score = min(100, (seo_hits / len(seo_keywords)) * 200) # Engagement hooks scoring # Design decision: We look for calls-to-action, questions, and invitation words # because these are proven to increase LinkedIn engagement and prompt connections. # Target of 5 indicators gives 100 score - this is based on LinkedIn best practices. engagement_indicators = [ "?", "let's", "connect", "dm", "message", "discuss", "share", "comment", "what's your", "have you", "follow", ] engagement_hits = sum( 1 for indicator in engagement_indicators if indicator in content_lower ) engagement_score = min(100, (engagement_hits / 5) * 100) # Business value scoring # Design decision: Recruiters and clients care about business outcomes, not just tech. # We prioritize words that show real-world impact and problem-solving ability. # This distinguishes professional content from purely academic content. value_indicators = [ "production", "scale", "roi", "business", "solution", "impact", "results", "improve", "optimize", "problem", "challenge", ] value_hits = sum(1 for indicator in value_indicators if indicator in content_lower) value_score = min(100, (value_hits / 5) * 100) # Portfolio mention detection # Design decision: Mentioning projects demonstrates hands-on experience. # This is critical for converting interest into opportunities. # We use a lower threshold (3 mentions = 100) since portfolios are mentioned sparingly. portfolio_indicators = ["project", "github", "kaggle", "built", "developed", "implemented"] portfolio_mentions = sum( 1 for indicator in portfolio_indicators if indicator in content_lower ) portfolio_score = min(100, (portfolio_mentions / 3) * 100) # Calculate overall opportunity score # Design decision: Weighted scoring gives highest priority to SEO and engagement (30% each) # because these directly impact visibility and connection rate. Business value (25%) and # portfolio (15%) are supporting factors. This weighting was designed based on LinkedIn's # algorithm priorities and recruiter behavior patterns. opportunity_score = int( seo_score * 0.3 + engagement_score * 0.3 + value_score * 0.25 + portfolio_score * 0.15 ) # Generate suggestions suggestions = [] if seo_score < 50: suggestions.append( f"Add more {target_role} keywords and technical terms for better visibility" ) if engagement_score < 50: suggestions.append( "Include stronger calls-to-action and questions to invite connections" ) if value_score < 50: suggestions.append("Emphasize business value and practical impact over pure theory") if portfolio_mentions == 0: suggestions.append( "Mention your projects or portfolio to demonstrate hands-on expertise" ) if len(content) < 300: suggestions.append( "Consider expanding content for better engagement (aim for 300+ words)" ) return { "status": "success", "opportunity_score": opportunity_score, "seo_score": int(seo_score), "engagement_score": int(engagement_score), "value_score": int(value_score), "portfolio_score": int(portfolio_score), "suggestions": suggestions if suggestions else ["Content looks great for opportunities!"], "grade": "Excellent" if opportunity_score >= 80 else "Good" if opportunity_score >= 60 else "Needs Improvement", } except Exception as e: return {"status": "error", "error_message": f"Content analysis error: {str(e)}"}