# # SPDX-FileCopyrightText: Hadad # SPDX-License-Identifier: Apache-2.0 # import requests from config import CONTENT_EXTRACTION, SEARCH_SELECTION from src.core.web_loader import web_loader class BrowserEngine: def __init__(self, configuration): self.config = configuration def generate_headers(self): ipv4 = web_loader.get_ipv4() ipv6 = web_loader.get_ipv6() user_agent = web_loader.get_user_agent() origin = web_loader.get_origin() referrer = web_loader.get_referrer() location = web_loader.get_location() return { "User-Agent": user_agent, "X-Forwarded-For": f"{ipv4}, {ipv6}", "X-Real-IP": ipv4, "X-Originating-IP": ipv4, "X-Remote-IP": ipv4, "X-Remote-Addr": ipv4, "X-Client-IP": ipv4, "X-Forwarded-Host": origin.replace("https://", "").replace("http://", ""), "Origin": origin, "Referer": referrer, "Accept-Language": f"{location['language']},en;q=0.9", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "DNT": "1", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "cross-site", "Sec-Fetch-User": "?1", "Cache-Control": "max-age=0", "X-Country": location['country'], "X-Timezone": location['timezone'] } def extract_page_content(self, target_url: str) -> str: try: headers = self.generate_headers() payload = { "url": target_url } request_response = requests.post( self.config.content_reader_api, data=payload, headers=headers, timeout=self.config.request_timeout, ) request_response.raise_for_status() extracted_content = request_response.text return f"{extracted_content}{CONTENT_EXTRACTION}" except Exception as error: return f"Error reading URL: {str(error)}" def perform_search(self, search_query: str, search_provider: str = "google") -> str: try: headers = self.generate_headers() if search_provider == "baidu": full_url = f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={requests.utils.quote(search_query)}" headers["X-Target-Selector"] = "#content_left" else: provider_prefix = "!go" if search_provider == "google" else "!bi" encoded_query = requests.utils.quote(f"{provider_prefix} {search_query}") full_url = f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={encoded_query}" headers["X-Target-Selector"] = "#urls" search_response = requests.get( full_url, headers=headers, timeout=self.config.request_timeout ) search_response.raise_for_status() search_results = search_response.text return f"{search_results}{SEARCH_SELECTION}" except Exception as error: return f"Error during search: {str(error)}"