fagun18's picture
Upload folder using huggingface_hub
2b89d68 verified
raw
history blame
5.38 kB
"""
🤖 Fagun Browser Automation Testing Agent - Site Audit
======================================================
Site-wide intelligent audit: crawl pages, detect broken links, collect console/network errors,
run form tests where applicable, and generate an aggregated report.
"""
import asyncio
import logging
from typing import Any, Dict, List, Set, Tuple
from urllib.parse import urljoin, urlparse
from playwright.async_api import Page
from src.utils.intelligent_form_testing import IntelligentFormTester
logger = logging.getLogger(__name__)
class SiteAuditor:
def __init__(self, page: Page, form_tester_factory):
"""page: a Playwright Page bound to a BrowserContext
form_tester_factory: callable that returns IntelligentFormTester for a given page
"""
self.page = page
self.form_tester_factory = form_tester_factory
async def audit(self, start_url: str, max_pages: int = 10, max_depth: int = 2) -> Dict[str, Any]:
visited: Set[str] = set()
queue: List[Tuple[str, int]] = [(start_url, 0)]
origin = self._origin(start_url)
pages_summary: List[Dict[str, Any]] = []
broken_links: List[Dict[str, str]] = []
while queue and len(visited) < max_pages:
url, depth = queue.pop(0)
if url in visited or depth > max_depth:
continue
visited.add(url)
try:
await self.page.goto(url, wait_until='domcontentloaded')
await asyncio.sleep(0.5)
page_result: Dict[str, Any] = {
"url": url,
"title": await self.page.title(),
"console_errors": await self._collect_console_errors(),
"network_issues": [],
}
# Basic broken link scan on current page (HEAD requests)
links = await self._extract_links()
same_origin_links = [l for l in links if self._origin(l) == origin]
# Check a subset to keep runtime in bounds
for link in same_origin_links[:50]:
status = await self._head_status(link)
if status >= 400:
broken = {"href": link, "status": str(status), "on_page": url}
broken_links.append(broken)
# Run intelligent form testing if forms exist
has_form = (await self.page.locator("form").count()) > 0
if has_form:
tester: IntelligentFormTester = self.form_tester_factory(self.page)
try:
await tester.discover_form_fields()
scenarios = await tester.generate_test_scenarios()
await tester.execute_test_scenarios(scenarios)
form_report = await tester.generate_comprehensive_report()
# Add basic accessibility checks for the page
a11y = await tester.run_basic_accessibility_checks()
page_result["form_testing"] = form_report
page_result["accessibility"] = a11y
except Exception as e:
page_result["form_testing_error"] = str(e)
pages_summary.append(page_result)
# Enqueue next links
for link in same_origin_links:
if link not in visited:
queue.append((link, depth + 1))
except Exception as e:
logger.warning(f"Audit navigation error at {url}: {e}")
pages_summary.append({"url": url, "error": str(e)})
return {
"start_url": start_url,
"total_pages_visited": len(visited),
"pages": pages_summary,
"broken_links": broken_links,
}
async def _extract_links(self) -> List[str]:
anchors = await self.page.locator("a[href]").all()
urls: List[str] = []
base = self.page.url
for a in anchors[:200]:
try:
href = await a.get_attribute("href")
if href:
urls.append(urljoin(base, href))
except Exception:
continue
return urls
async def _head_status(self, url: str) -> int:
try:
# Use context.request for lightweight request
resp = await self.page.context.request.get(url, max_redirects=2)
return resp.status
except Exception:
return 599
async def _collect_console_errors(self) -> List[str]:
# Snapshot console errors present in DOM if any common containers exist
errors: List[str] = []
try:
# Heuristic: look for aria role alert or typical error classes
loc = self.page.locator(".error, .alert-danger, [role='alert']").all()
for l in await loc:
try:
txt = await l.text_content()
if txt:
errors.append(txt.strip())
except Exception:
continue
except Exception:
pass
return errors
def _origin(self, url: str) -> str:
u = urlparse(url)
return f"{u.scheme}://{u.netloc}"