Spaces:
Running
Running
| """ | |
| 🤖 Fagun Browser Automation Testing Agent - Site Audit | |
| ====================================================== | |
| Site-wide intelligent audit: crawl pages, detect broken links, collect console/network errors, | |
| run form tests where applicable, and generate an aggregated report. | |
| """ | |
| import asyncio | |
| import logging | |
| from typing import Any, Dict, List, Set, Tuple | |
| from urllib.parse import urljoin, urlparse | |
| from playwright.async_api import Page | |
| from src.utils.intelligent_form_testing import IntelligentFormTester | |
| logger = logging.getLogger(__name__) | |
| class SiteAuditor: | |
| def __init__(self, page: Page, form_tester_factory): | |
| """page: a Playwright Page bound to a BrowserContext | |
| form_tester_factory: callable that returns IntelligentFormTester for a given page | |
| """ | |
| self.page = page | |
| self.form_tester_factory = form_tester_factory | |
| async def audit(self, start_url: str, max_pages: int = 10, max_depth: int = 2) -> Dict[str, Any]: | |
| visited: Set[str] = set() | |
| queue: List[Tuple[str, int]] = [(start_url, 0)] | |
| origin = self._origin(start_url) | |
| pages_summary: List[Dict[str, Any]] = [] | |
| broken_links: List[Dict[str, str]] = [] | |
| while queue and len(visited) < max_pages: | |
| url, depth = queue.pop(0) | |
| if url in visited or depth > max_depth: | |
| continue | |
| visited.add(url) | |
| try: | |
| await self.page.goto(url, wait_until='domcontentloaded') | |
| await asyncio.sleep(0.5) | |
| page_result: Dict[str, Any] = { | |
| "url": url, | |
| "title": await self.page.title(), | |
| "console_errors": await self._collect_console_errors(), | |
| "network_issues": [], | |
| } | |
| # Basic broken link scan on current page (HEAD requests) | |
| links = await self._extract_links() | |
| same_origin_links = [l for l in links if self._origin(l) == origin] | |
| # Check a subset to keep runtime in bounds | |
| for link in same_origin_links[:50]: | |
| status = await self._head_status(link) | |
| if status >= 400: | |
| broken = {"href": link, "status": str(status), "on_page": url} | |
| broken_links.append(broken) | |
| # Run intelligent form testing if forms exist | |
| has_form = (await self.page.locator("form").count()) > 0 | |
| if has_form: | |
| tester: IntelligentFormTester = self.form_tester_factory(self.page) | |
| try: | |
| await tester.discover_form_fields() | |
| scenarios = await tester.generate_test_scenarios() | |
| await tester.execute_test_scenarios(scenarios) | |
| form_report = await tester.generate_comprehensive_report() | |
| # Add basic accessibility checks for the page | |
| a11y = await tester.run_basic_accessibility_checks() | |
| page_result["form_testing"] = form_report | |
| page_result["accessibility"] = a11y | |
| except Exception as e: | |
| page_result["form_testing_error"] = str(e) | |
| pages_summary.append(page_result) | |
| # Enqueue next links | |
| for link in same_origin_links: | |
| if link not in visited: | |
| queue.append((link, depth + 1)) | |
| except Exception as e: | |
| logger.warning(f"Audit navigation error at {url}: {e}") | |
| pages_summary.append({"url": url, "error": str(e)}) | |
| return { | |
| "start_url": start_url, | |
| "total_pages_visited": len(visited), | |
| "pages": pages_summary, | |
| "broken_links": broken_links, | |
| } | |
| async def _extract_links(self) -> List[str]: | |
| anchors = await self.page.locator("a[href]").all() | |
| urls: List[str] = [] | |
| base = self.page.url | |
| for a in anchors[:200]: | |
| try: | |
| href = await a.get_attribute("href") | |
| if href: | |
| urls.append(urljoin(base, href)) | |
| except Exception: | |
| continue | |
| return urls | |
| async def _head_status(self, url: str) -> int: | |
| try: | |
| # Use context.request for lightweight request | |
| resp = await self.page.context.request.get(url, max_redirects=2) | |
| return resp.status | |
| except Exception: | |
| return 599 | |
| async def _collect_console_errors(self) -> List[str]: | |
| # Snapshot console errors present in DOM if any common containers exist | |
| errors: List[str] = [] | |
| try: | |
| # Heuristic: look for aria role alert or typical error classes | |
| loc = self.page.locator(".error, .alert-danger, [role='alert']").all() | |
| for l in await loc: | |
| try: | |
| txt = await l.text_content() | |
| if txt: | |
| errors.append(txt.strip()) | |
| except Exception: | |
| continue | |
| except Exception: | |
| pass | |
| return errors | |
| def _origin(self, url: str) -> str: | |
| u = urlparse(url) | |
| return f"{u.scheme}://{u.netloc}" | |