File size: 10,952 Bytes
33cfa2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
"""File caching service"""
import os
import asyncio
import hashlib
import time
from pathlib import Path
from typing import Optional
from datetime import datetime, timedelta
from curl_cffi.requests import AsyncSession
from ..core.config import config
from ..core.logger import debug_logger


class FileCache:
    """File caching service for videos"""

    def __init__(self, cache_dir: str = "tmp", default_timeout: int = 7200, proxy_manager=None):
        """
        Initialize file cache

        Args:
            cache_dir: Cache directory path
            default_timeout: Default cache timeout in seconds (default: 2 hours)
            proxy_manager: ProxyManager instance for downloading files
        """
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(exist_ok=True)
        self.default_timeout = default_timeout
        self.proxy_manager = proxy_manager
        self._cleanup_task = None

    async def start_cleanup_task(self):
        """Start background cleanup task"""
        if self._cleanup_task is None:
            self._cleanup_task = asyncio.create_task(self._cleanup_loop())

    async def stop_cleanup_task(self):
        """Stop background cleanup task"""
        if self._cleanup_task:
            self._cleanup_task.cancel()
            try:
                await self._cleanup_task
            except asyncio.CancelledError:
                pass
            self._cleanup_task = None

    async def _cleanup_loop(self):
        """Background task to clean up expired files"""
        while True:
            try:
                await asyncio.sleep(300)  # Check every 5 minutes
                await self._cleanup_expired_files()
            except asyncio.CancelledError:
                break
            except Exception as e:
                debug_logger.log_error(
                    error_message=f"Cleanup task error: {str(e)}",
                    status_code=0,
                    response_text=""
                )

    async def _cleanup_expired_files(self):
        """Remove expired cache files"""
        try:
            current_time = time.time()
            removed_count = 0

            for file_path in self.cache_dir.iterdir():
                if file_path.is_file():
                    # Check file age
                    file_age = current_time - file_path.stat().st_mtime
                    if file_age > self.default_timeout:
                        try:
                            file_path.unlink()
                            removed_count += 1
                        except Exception:
                            pass

            if removed_count > 0:
                debug_logger.log_info(f"Cleanup: removed {removed_count} expired cache files")

        except Exception as e:
            debug_logger.log_error(
                error_message=f"Failed to cleanup expired files: {str(e)}",
                status_code=0,
                response_text=""
            )

    def _generate_cache_filename(self, url: str, media_type: str) -> str:
        """Generate unique filename for cached file"""
        # Use URL hash as filename
        url_hash = hashlib.md5(url.encode()).hexdigest()

        # Determine file extension
        if media_type == "video":
            ext = ".mp4"
        elif media_type == "image":
            ext = ".jpg"
        else:
            ext = ""

        return f"{url_hash}{ext}"

    async def download_and_cache(self, url: str, media_type: str) -> str:
        """
        Download file from URL and cache it locally

        Args:
            url: File URL to download
            media_type: 'image' or 'video'

        Returns:
            Local cache filename
        """
        filename = self._generate_cache_filename(url, media_type)
        file_path = self.cache_dir / filename

        # Check if already cached and not expired
        if file_path.exists():
            file_age = time.time() - file_path.stat().st_mtime
            if file_age < self.default_timeout:
                debug_logger.log_info(f"Cache hit: {filename}")
                return filename
            else:
                # Remove expired file
                try:
                    file_path.unlink()
                except Exception:
                    pass

        # Download file
        debug_logger.log_info(f"Downloading file from: {url}")

        # Get proxy if available
        proxy_url = None
        if self.proxy_manager:
            proxy_config = await self.proxy_manager.get_proxy_config()
            if proxy_config and proxy_config.enabled and proxy_config.proxy_url:
                proxy_url = proxy_config.proxy_url

        # Try method 1: curl_cffi with browser impersonation
        try:
            async with AsyncSession() as session:
                proxies = {"http": proxy_url, "https": proxy_url} if proxy_url else None
                headers = {
                    "Accept": "*/*",
                    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
                    "Accept-Encoding": "gzip, deflate, br",
                    "Connection": "keep-alive",
                    "Sec-Fetch-Dest": "document",
                    "Sec-Fetch-Mode": "navigate",
                    "Sec-Fetch-Site": "none",
                    "Upgrade-Insecure-Requests": "1"
                }
                response = await session.get(
                    url,
                    timeout=60,
                    proxies=proxies,
                    headers=headers,
                    impersonate="chrome120",
                    verify=False
                )

                if response.status_code == 200:
                    with open(file_path, 'wb') as f:
                        f.write(response.content)
                    debug_logger.log_info(f"File cached (curl_cffi): {filename} ({len(response.content)} bytes)")
                    return filename
                else:
                    debug_logger.log_warning(f"curl_cffi failed with HTTP {response.status_code}, trying wget...")

        except Exception as e:
            debug_logger.log_warning(f"curl_cffi failed: {str(e)}, trying wget...")

        # Try method 2: wget command
        try:
            import subprocess

            wget_cmd = [
                "wget",
                "-q",  # Quiet mode
                "-O", str(file_path),  # Output file
                "--timeout=60",
                "--tries=3",
                "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
                "--header=Accept: */*",
                "--header=Accept-Language: zh-CN,zh;q=0.9,en;q=0.8",
                "--header=Connection: keep-alive"
            ]

            # Add proxy if configured
            if proxy_url:
                # wget uses environment variables for proxy
                env = os.environ.copy()
                env['http_proxy'] = proxy_url
                env['https_proxy'] = proxy_url
            else:
                env = None

            # Add URL
            wget_cmd.append(url)

            # Execute wget
            result = subprocess.run(wget_cmd, capture_output=True, timeout=90, env=env)

            if result.returncode == 0 and file_path.exists():
                file_size = file_path.stat().st_size
                if file_size > 0:
                    debug_logger.log_info(f"File cached (wget): {filename} ({file_size} bytes)")
                    return filename
                else:
                    raise Exception("Downloaded file is empty")
            else:
                error_msg = result.stderr.decode('utf-8', errors='ignore') if result.stderr else "Unknown error"
                debug_logger.log_warning(f"wget failed: {error_msg}, trying curl...")

        except FileNotFoundError:
            debug_logger.log_warning("wget not found, trying curl...")
        except Exception as e:
            debug_logger.log_warning(f"wget failed: {str(e)}, trying curl...")

        # Try method 3: system curl command
        try:
            import subprocess

            curl_cmd = [
                "curl",
                "-L",  # Follow redirects
                "-s",  # Silent mode
                "-o", str(file_path),  # Output file
                "--max-time", "60",
                "-H", "Accept: */*",
                "-H", "Accept-Language: zh-CN,zh;q=0.9,en;q=0.8",
                "-H", "Connection: keep-alive",
                "-A", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
            ]

            # Add proxy if configured
            if proxy_url:
                curl_cmd.extend(["-x", proxy_url])

            # Add URL
            curl_cmd.append(url)

            # Execute curl
            result = subprocess.run(curl_cmd, capture_output=True, timeout=90)

            if result.returncode == 0 and file_path.exists():
                file_size = file_path.stat().st_size
                if file_size > 0:
                    debug_logger.log_info(f"File cached (curl): {filename} ({file_size} bytes)")
                    return filename
                else:
                    raise Exception("Downloaded file is empty")
            else:
                error_msg = result.stderr.decode('utf-8', errors='ignore') if result.stderr else "Unknown error"
                raise Exception(f"curl command failed: {error_msg}")

        except Exception as e:
            debug_logger.log_error(
                error_message=f"Failed to download file: {str(e)}",
                status_code=0,
                response_text=str(e)
            )
            raise Exception(f"Failed to cache file: {str(e)}")

    def get_cache_path(self, filename: str) -> Path:
        """Get full path to cached file"""
        return self.cache_dir / filename

    def set_timeout(self, timeout: int):
        """Set cache timeout in seconds"""
        self.default_timeout = timeout
        debug_logger.log_info(f"Cache timeout updated to {timeout} seconds")

    def get_timeout(self) -> int:
        """Get current cache timeout"""
        return self.default_timeout

    async def clear_all(self):
        """Clear all cached files"""
        try:
            removed_count = 0
            for file_path in self.cache_dir.iterdir():
                if file_path.is_file():
                    try:
                        file_path.unlink()
                        removed_count += 1
                    except Exception:
                        pass

            debug_logger.log_info(f"Cache cleared: removed {removed_count} files")
            return removed_count

        except Exception as e:
            debug_logger.log_error(
                error_message=f"Failed to clear cache: {str(e)}",
                status_code=0,
                response_text=""
            )
            raise