import requests from typing import List, Dict, Any from python.helpers.tool import Tool, Response # The base URL of the public SearXNG instance. # WARNING: Using a public, third-party instance is not recommended for production # due to significant reliability, security, and privacy risks. SEARXNG_BASE_URL = "https://CJJ-on-HF-SearXNG.hf.space" class SearchEngine(Tool): """ A tool to perform web searches using a public SearXNG instance. """ async def execute(self, query: str, category: str = "general", num_results: int = 5) -> Response: """ Performs a web search using a public SearXNG instance and returns formatted results. This tool allows for targeted searches using categories defined in the SearXNG instance. Args: query (str): The search query string. category (str): The SearXNG category to search in (e.g., 'science', 'it', 'news'). Defaults to 'general' for a broad search. num_results (int): The maximum number of search results to return. Defaults to 5. Returns: Response: A Response object containing the formatted search results or an error message. """ if not query: return Response(message="Error: The search query cannot be empty.", break_loop=False) # Construct the query with a category prefix if specified. # This leverages the power of SearXNG's engine configuration. search_query = f"!{category} {query}" if category and category != "general" else query params = { "q": search_query, "format": "json", # Essential for machine-readable output "pageno": 1, } try: response = requests.get( f"{SEARXNG_BASE_URL}/search", params=params, timeout=15 # A generous but necessary timeout for a public service ) # Raise an HTTPError for bad responses (4xx or 5xx) response.raise_for_status() data = response.json() results: List[Dict[str, Any]] = data.get("results", []) if not results: return Response(message=f"No search results found for the query: '{query}'", break_loop=False) # Format the results into a clean, readable string for the agent formatted_output = [] for i, res in enumerate(results[:num_results]): title = res.get("title", "No Title Provided") url = res.get("url", "No URL Provided") snippet = res.get("content") or res.get("description", "No Snippet Provided") # Sanitize snippet to remove excessive newlines for cleaner LLM input clean_snippet = ' '.join(snippet.split()) if snippet else "No Snippet Provided" formatted_output.append( f"Result {i+1}:\n" f" Title: {title}\n" f" URL: {url}\n" f" Snippet: {clean_snippet}" ) return Response(message="\n---\n".join(formatted_output), break_loop=False) except requests.exceptions.Timeout: return Response(message="Error: The search request timed out. The SearXNG instance may be offline or overloaded.", break_loop=False) except requests.exceptions.RequestException as e: return Response(message=f"Error: A network error occurred while contacting the search service: {e}", break_loop=False) except ValueError: # Catches JSON decoding errors return Response(message="Error: Failed to parse a valid JSON response from the search service. The service might be down or returning malformed data.", break_loop=False)