File size: 9,056 Bytes
33cfa2a 4b5def4 33cfa2a 4b5def4 33cfa2a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import asyncio
import time
import re
import os
from typing import Optional, Dict
from ..core.logger import debug_logger
# Conditionally import playwright
try:
from playwright.async_api import async_playwright, BrowserContext, Page
PLAYWRIGHT_AVAILABLE = True
except ImportError:
PLAYWRIGHT_AVAILABLE = False
# ... (保持原来的 parse_proxy_url 和 validate_browser_proxy_url 函数不变) ...
def parse_proxy_url(proxy_url: str) -> Optional[Dict[str, str]]:
"""解析代理URL,分离协议、主机、端口、认证信息"""
proxy_pattern = r'^(socks5|http|https)://(?:([^:]+):([^@]+)@)?([^:]+):(\d+)$'
match = re.match(proxy_pattern, proxy_url)
if match:
protocol, username, password, host, port = match.groups()
proxy_config = {'server': f'{protocol}://{host}:{port}'}
if username and password:
proxy_config['username'] = username
proxy_config['password'] = password
return proxy_config
return None
class BrowserCaptchaService:
"""浏览器自动化获取 reCAPTCHA token(持久化有头模式)"""
_instance: Optional['BrowserCaptchaService'] = None
_lock = asyncio.Lock()
def __init__(self, db=None):
"""初始化服务"""
# === 修改点 1: 设置为有头模式 ===
self.headless = False
self.playwright = None
# 注意: 持久化模式下,我们操作的是 context 而不是 browser
self.context: Optional[BrowserContext] = None
self._initialized = False
self.website_key = "6LdsFiUsAAAAAIjVDZcuLhaHiDn5nnHVXVRQGeMV"
self.db = db
# === 修改点 2: 指定本地数据存储目录 ===
# 这会在脚本运行目录下生成 browser_data 文件夹,用于保存你的登录状态
self.user_data_dir = os.path.join(os.getcwd(), "browser_data")
@classmethod
async def get_instance(cls, db=None) -> 'BrowserCaptchaService':
if cls._instance is None:
async with cls._lock:
if cls._instance is None:
cls._instance = cls(db)
# 首次调用不强制初始化,等待 get_token 时懒加载,或者可以在这里await
return cls._instance
async def initialize(self):
"""初始化持久化浏览器上下文"""
if self._initialized and self.context:
return
try:
# 检查 Playwright 是否可用
if not PLAYWRIGHT_AVAILABLE:
debug_logger.log_error("[BrowserCaptcha] ❌ Playwright 不可用,请使用 YesCaptcha 服务")
raise ImportError("Playwright 未安装,请使用 YesCaptcha 服务")
proxy_url = None
if self.db:
captcha_config = await self.db.get_captcha_config()
if captcha_config.browser_proxy_enabled and captcha_config.browser_proxy_url:
proxy_url = captcha_config.browser_proxy_url
debug_logger.log_info(f"[BrowserCaptcha] 正在启动浏览器 (用户数据目录: {self.user_data_dir})...")
self.playwright = await async_playwright().start()
# 配置启动参数
launch_options = {
'headless': self.headless,
'user_data_dir': self.user_data_dir, # 指定数据目录
'viewport': {'width': 1280, 'height': 720}, # 设置默认窗口大小
'args': [
'--disable-blink-features=AutomationControlled',
'--disable-infobars',
'--no-sandbox',
'--disable-setuid-sandbox',
]
}
# 代理配置
if proxy_url:
proxy_config = parse_proxy_url(proxy_url)
if proxy_config:
launch_options['proxy'] = proxy_config
debug_logger.log_info(f"[BrowserCaptcha] 使用代理: {proxy_config['server']}")
# === 修改点 3: 使用 launch_persistent_context ===
# 这会启动一个带有状态的浏览器窗口
self.context = await self.playwright.chromium.launch_persistent_context(**launch_options)
# 设置默认超时
self.context.set_default_timeout(30000)
self._initialized = True
debug_logger.log_info(f"[BrowserCaptcha] ✅ 浏览器已启动 (Profile: {self.user_data_dir})")
except Exception as e:
debug_logger.log_error(f"[BrowserCaptcha] ❌ 浏览器启动失败: {str(e)}")
raise
async def get_token(self, project_id: str) -> Optional[str]:
"""获取 reCAPTCHA token"""
# 确保浏览器已启动
if not self._initialized or not self.context:
await self.initialize()
start_time = time.time()
page: Optional[Page] = None
try:
# === 修改点 4: 在现有上下文中新建标签页,而不是新建上下文 ===
# 这样可以复用该上下文中已保存的 Cookie (你的登录状态)
page = await self.context.new_page()
website_url = f"https://labs.google/fx/tools/flow/project/{project_id}"
debug_logger.log_info(f"[BrowserCaptcha] 访问页面: {website_url}")
# 访问页面
try:
await page.goto(website_url, wait_until="domcontentloaded")
except Exception as e:
debug_logger.log_warning(f"[BrowserCaptcha] 页面加载警告: {str(e)}")
# --- 关键点:如果需要人工介入 ---
# 你可以在这里加入一段逻辑,如果是第一次运行,或者检测到未登录,
# 可以暂停脚本,等你手动操作完再继续。
# 例如: await asyncio.sleep(30)
# ... (中间注入脚本和执行 reCAPTCHA 的代码逻辑与原版完全一致,此处省略以节省篇幅) ...
# ... 请将原代码中从 "检查并注入 reCAPTCHA v3 脚本" 到 token 获取部分的代码复制到这里 ...
# 这里为了演示,简写注入逻辑(请保留你原有的完整注入逻辑):
script_loaded = await page.evaluate("() => { return !!(window.grecaptcha && window.grecaptcha.execute); }")
if not script_loaded:
await page.evaluate(f"""
() => {{
const script = document.createElement('script');
script.src = 'https://www.google.com/recaptcha/api.js?render={self.website_key}';
script.async = true; script.defer = true;
document.head.appendChild(script);
}}
""")
# 等待加载... (保留你原有的等待循环)
await page.wait_for_timeout(2000)
# 执行获取 Token (保留你原有的 execute 逻辑)
token = await page.evaluate(f"""
async () => {{
try {{
return await window.grecaptcha.execute('{self.website_key}', {{ action: 'FLOW_GENERATION' }});
}} catch (e) {{ return null; }}
}}
""")
if token:
debug_logger.log_info(f"[BrowserCaptcha] ✅ Token获取成功")
return token
else:
debug_logger.log_error("[BrowserCaptcha] Token获取失败")
return None
except Exception as e:
debug_logger.log_error(f"[BrowserCaptcha] 异常: {str(e)}")
return None
finally:
# === 修改点 5: 只关闭 Page (标签页),不关闭 Context (浏览器窗口) ===
if page:
try:
await page.close()
except:
pass
async def close(self):
"""完全关闭浏览器(清理资源时调用)"""
try:
if self.context:
await self.context.close() # 这会关闭整个浏览器窗口
self.context = None
if self.playwright:
await self.playwright.stop()
self.playwright = None
self._initialized = False
debug_logger.log_info("[BrowserCaptcha] 浏览器服务已关闭")
except Exception as e:
debug_logger.log_error(f"[BrowserCaptcha] 关闭异常: {str(e)}")
# 增加一个辅助方法,用于手动登录
async def open_login_window(self):
"""调用此方法打开一个永久窗口供你登录Google"""
await self.initialize()
page = await self.context.new_page()
await page.goto("https://accounts.google.com/")
print("请在打开的浏览器中登录账号。登录完成后,无需关闭浏览器,脚本下次运行时会自动使用此状态。") |