File size: 3,524 Bytes
408c946
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#

import requests
from config import CONTENT_EXTRACTION, SEARCH_SELECTION
from src.core.web_loader import web_loader

class BrowserEngine:
    def __init__(self, configuration):
        self.config = configuration
    
    def generate_headers(self):
        ipv4 = web_loader.get_ipv4()
        ipv6 = web_loader.get_ipv6()
        user_agent = web_loader.get_user_agent()
        origin = web_loader.get_origin()
        referrer = web_loader.get_referrer()
        location = web_loader.get_location()
        
        return {
            "User-Agent": user_agent,
            "X-Forwarded-For": f"{ipv4}, {ipv6}",
            "X-Real-IP": ipv4,
            "X-Originating-IP": ipv4,
            "X-Remote-IP": ipv4,
            "X-Remote-Addr": ipv4,
            "X-Client-IP": ipv4,
            "X-Forwarded-Host": origin.replace("https://", "").replace("http://", ""),
            "Origin": origin,
            "Referer": referrer,
            "Accept-Language": f"{location['language']},en;q=0.9",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, br",
            "DNT": "1",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "cross-site",
            "Sec-Fetch-User": "?1",
            "Cache-Control": "max-age=0",
            "X-Country": location['country'],
            "X-Timezone": location['timezone']
        }

    def extract_page_content(self, target_url: str) -> str:
        try:
            headers = self.generate_headers()
            payload = {
                "url": target_url
            }
            request_response = requests.post(
                self.config.content_reader_api,
                data=payload,
                headers=headers,
                timeout=self.config.request_timeout,
            )
            request_response.raise_for_status()
            extracted_content = request_response.text
            return f"{extracted_content}{CONTENT_EXTRACTION}"
        except Exception as error:
            return f"Error reading URL: {str(error)}"

    def perform_search(self, search_query: str, search_provider: str = "google") -> str:
        try:
            headers = self.generate_headers()
            
            if search_provider == "baidu":
                full_url = f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={requests.utils.quote(search_query)}"
                headers["X-Target-Selector"] = "#content_left"
            else:
                provider_prefix = "!go" if search_provider == "google" else "!bi"
                encoded_query = requests.utils.quote(f"{provider_prefix} {search_query}")
                full_url = f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={encoded_query}"
                headers["X-Target-Selector"] = "#urls"
            
            search_response = requests.get(
                full_url, 
                headers=headers, 
                timeout=self.config.request_timeout
            )
            search_response.raise_for_status()
            search_results = search_response.text
            return f"{search_results}{SEARCH_SELECTION}"
        except Exception as error:
            return f"Error during search: {str(error)}"