File size: 35,899 Bytes
1643511
716f1cd
 
 
 
66e07c5
716f1cd
 
 
 
 
 
 
1643511
 
 
 
716f1cd
66e07c5
 
 
716f1cd
 
1643511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716f1cd
1643511
716f1cd
 
1643511
 
 
 
716f1cd
1643511
66e07c5
716f1cd
66e07c5
716f1cd
 
 
 
1643511
716f1cd
 
 
1643511
716f1cd
 
1643511
716f1cd
 
 
66e07c5
 
716f1cd
1643511
716f1cd
 
1643511
 
716f1cd
 
 
1643511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0b281d
 
 
1643511
 
f0b281d
1643511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0b281d
 
 
1643511
 
f0b281d
1643511
 
 
 
 
 
 
 
 
716f1cd
1643511
 
716f1cd
1643511
716f1cd
1643511
 
716f1cd
 
1643511
716f1cd
1643511
 
 
 
 
 
 
 
 
 
 
 
716f1cd
1643511
 
 
 
 
 
 
 
 
 
716f1cd
 
1643511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716f1cd
1643511
 
 
 
716f1cd
1643511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716f1cd
 
1643511
716f1cd
1643511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716f1cd
1643511
 
 
716f1cd
 
 
 
1643511
716f1cd
 
1643511
716f1cd
 
 
 
 
 
 
 
 
 
 
 
1643511
 
 
 
 
716f1cd
 
 
 
 
 
1643511
 
716f1cd
 
 
 
 
1643511
716f1cd
 
1643511
716f1cd
 
 
1643511
716f1cd
 
 
 
 
 
 
 
1643511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716f1cd
08708af
 
 
 
 
 
1643511
716f1cd
1643511
 
 
716f1cd
1643511
08708af
1643511
 
 
 
716f1cd
1643511
 
 
 
 
 
 
 
 
716f1cd
1643511
716f1cd
 
1643511
716f1cd
 
 
 
 
 
 
 
 
 
 
 
1643511
 
 
716f1cd
1643511
 
716f1cd
 
 
 
 
 
 
1643511
716f1cd
 
 
 
 
 
 
 
 
 
 
1643511
 
 
 
 
 
 
 
716f1cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ce56e4
 
 
 
 
 
716f1cd
 
 
 
1643511
 
716f1cd
 
 
 
1643511
716f1cd
 
 
 
 
 
1643511
716f1cd
 
 
 
 
 
 
 
 
 
 
 
08708af
 
 
 
66e07c5
 
 
08708af
66e07c5
 
 
 
08708af
66e07c5
 
716f1cd
 
 
 
 
 
 
 
 
1643511
 
 
 
 
 
716f1cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1643511
 
716f1cd
1643511
 
716f1cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ce56e4
 
 
 
 
66e07c5
1ce56e4
 
 
 
 
 
 
 
 
 
 
66e07c5
716f1cd
1ce56e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716f1cd
1ce56e4
 
 
 
 
 
 
 
 
 
 
 
 
716f1cd
 
1ce56e4
716f1cd
 
1643511
 
 
 
716f1cd
 
 
 
 
1643511
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
"""EDGAR API Client Module with Performance Optimization"""

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import urllib3
try:
    from sec_edgar_api.EdgarClient import EdgarClient
except ImportError:
    EdgarClient = None
import json
import time
import threading
from functools import lru_cache
from datetime import datetime, timedelta
import re
import difflib

# Disable SSL warnings for better compatibility
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


class EdgarDataClient:
    # Class-level cache for company_tickers.json (shared across instances)
    _company_tickers_cache = None
    _company_tickers_cache_time = None
    _company_tickers_cache_ttl = 3600  # 1 hour TTL
    _cache_lock = threading.Lock()
    
    # Class-level rate limiter (SEC requires max 10 requests per second)
    _last_request_time = 0
    _rate_limit_lock = threading.Lock()
    _min_request_interval = 0.11  # 110ms between requests (9 req/sec, safe margin)
    
    # 新增:公司索引(加速搜索,避免每次遍历全量数据)
    _by_ticker = None  # ticker -> company info
    _by_title = None  # title (lowercase) -> company info
    _by_title_norm = None  # normalized title -> company info
    _all_keys = None  # 用于模糊匹配的所有key列表
    _index_built_time = None
    _index_ttl = 3600  # 1 hour
    
    # 新增:常见别名映射(提升搜索智能性)
    _alias_map = {
        "google": "alphabet inc",
        "alphabet": "alphabet inc",
        "facebook": "meta platforms, inc.",
        "meta": "meta platforms, inc.",
        "amazon": "amazon.com, inc.",
        "apple": "apple inc.",
        "microsoft": "microsoft corporation",
        "netflix": "netflix, inc.",
        "nvidia": "nvidia corporation",
        "tesla": "tesla, inc.",
        "adobe": "adobe inc.",
        "oracle": "oracle corporation",
        "ibm": "international business machines corporation",
        "paypal": "paypal holdings, inc.",
        "shopify": "shopify inc.",
    }
    
    def __init__(self, user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"):
        """Initialize EDGAR client with connection pooling and timeout"""
        self.user_agent = user_agent
        
        # 新增:实例级搜索缓存(进一步减少重复搜索开销)
        self._search_cache = {}
        
        # Configure requests session with connection pooling
        self.session = requests.Session()
        
        # Configure retry strategy with enhanced retries for stability
        retry_strategy = Retry(
            total=5,  # Increased from 3 to 5 for better reliability
            backoff_factor=1,
            status_forcelist=[429, 500, 502, 503, 504],
            allowed_methods=["HEAD", "GET", "OPTIONS"]
        )
        
        adapter = HTTPAdapter(
            pool_connections=10,
            pool_maxsize=20,
            max_retries=retry_strategy,
            pool_block=False
        )
        
        self.session.mount("http://", adapter)
        self.session.mount("https://", adapter)
        
        # Set default timeout with connection and read timeouts
        self.timeout = (10, 30)  # (connect timeout, read timeout)
        
        # Initialize sec_edgar_api client with timeout wrapper
        if EdgarClient:
            self.edgar = EdgarClient(user_agent=user_agent)
            # Monkey patch to add timeout
            self._patch_edgar_client_timeout()
        else:
            self.edgar = None
    
    def _patch_edgar_client_timeout(self):
        """Monkey patch sec_edgar_api to add timeout support"""
        if not self.edgar:
            return
        
        # Wrap get_submissions and get_company_facts with timeout (thread-based, Gradio compatible)
        original_get_submissions = self.edgar.get_submissions
        original_get_company_facts = self.edgar.get_company_facts
        
        def get_submissions_with_timeout(cik):
            """Thread-based timeout wrapper for get_submissions (Gradio compatible)"""
            result = [None]
            exception = [None]
            
            def wrapper():
                try:
                    result[0] = original_get_submissions(cik)
                except Exception as e:
                    exception[0] = e
            
            thread = threading.Thread(target=wrapper, daemon=True)
            thread.start()
            # Use read timeout value (second element of timeout tuple)
            timeout_seconds = self.timeout[1] if isinstance(self.timeout, tuple) else self.timeout
            thread.join(timeout=timeout_seconds)
            
            if thread.is_alive():
                raise TimeoutError(f"SEC API request timeout ({timeout_seconds}s)")
            
            if exception[0]:
                raise exception[0]
            
            return result[0]
        
        def get_company_facts_with_timeout(cik):
            """Thread-based timeout wrapper for get_company_facts (Gradio compatible)"""
            result = [None]
            exception = [None]
            
            def wrapper():
                try:
                    result[0] = original_get_company_facts(cik)
                except Exception as e:
                    exception[0] = e
            
            thread = threading.Thread(target=wrapper, daemon=True)
            thread.start()
            # Use read timeout value (second element of timeout tuple)
            timeout_seconds = self.timeout[1] if isinstance(self.timeout, tuple) else self.timeout
            thread.join(timeout=timeout_seconds)
            
            if thread.is_alive():
                raise TimeoutError(f"SEC API request timeout ({timeout_seconds}s)")
            
            if exception[0]:
                raise exception[0]
            
            return result[0]
        
        self.edgar.get_submissions = get_submissions_with_timeout
        self.edgar.get_company_facts = get_company_facts_with_timeout
    
    def _rate_limit(self):
        """Thread-safe rate limiting to comply with SEC requirements"""
        with self._rate_limit_lock:
            current_time = time.time()
            time_since_last = current_time - EdgarDataClient._last_request_time
            
            if time_since_last < self._min_request_interval:
                sleep_time = self._min_request_interval - time_since_last
                time.sleep(sleep_time)
            
            EdgarDataClient._last_request_time = time.time()
    
    def _normalize_text(self, s: str) -> str:
        """规范化文本:用于提升匹配准确度"""
        if not s:
            return ""
        s = s.lower().strip()
        s = s.replace("&", " and ")
        s = re.sub(r"[.,()\-_/]", " ", s)
        s = re.sub(r"\s+", " ", s)
        # 移除常见后缀词
        stopwords = {"inc", "inc.", "incorporated", "corp", "corporation", "co", "company", "plc", "ltd", "llc", "the"}
        tokens = [t for t in s.split() if t not in stopwords]
        return " ".join(tokens).strip()
    
    def _ensure_company_index(self):
        """确保公司索引已构建(按需构建或过期重建)"""
        with self._cache_lock:
            current_time = time.time()
            
            # 若 company_tickers 缓存不存在或已过期,先刷新
            if (EdgarDataClient._company_tickers_cache is None or
                EdgarDataClient._company_tickers_cache_time is None or
                current_time - EdgarDataClient._company_tickers_cache_time >= self._company_tickers_cache_ttl):
                # 拉取并更新 company_tickers 缓存
                self._rate_limit()
                url = "https://www.sec.gov/files/company_tickers.json"
                headers = {"User-Agent": self.user_agent}
                response = self.session.get(url, headers=headers, timeout=self.timeout)
                response.raise_for_status()
                companies = response.json()
                EdgarDataClient._company_tickers_cache = companies
                EdgarDataClient._company_tickers_cache_time = current_time
            else:
                companies = EdgarDataClient._company_tickers_cache
            
            # 若索引不存在或已过期,则重建索引
            if (EdgarDataClient._by_ticker is None or
                EdgarDataClient._by_title is None or
                EdgarDataClient._by_title_norm is None or
                EdgarDataClient._all_keys is None or
                EdgarDataClient._index_built_time is None or
                current_time - EdgarDataClient._index_built_time >= EdgarDataClient._index_ttl):
                
                by_ticker = {}
                by_title = {}
                by_title_norm = {}
                all_keys = []
                
                for _, company in companies.items():
                    title = company.get("title", "")
                    ticker = company.get("ticker", "")
                    cik_str = str(company.get("cik_str", "")).zfill(10)
                    
                    title_lower = title.lower()
                    ticker_lower = ticker.lower()
                    title_norm = self._normalize_text(title)
                    
                    # 构建索引:ticker、title、normalized title
                    if ticker_lower:
                        by_ticker[ticker_lower] = {"cik": cik_str, "name": title, "ticker": ticker}
                        all_keys.append(ticker_lower)
                    if title_lower:
                        by_title[title_lower] = {"cik": cik_str, "name": title, "ticker": ticker}
                    if title_norm:
                        by_title_norm[title_norm] = {"cik": cik_str, "name": title, "ticker": ticker}
                        all_keys.append(title_norm)
                
                EdgarDataClient._by_ticker = by_ticker
                EdgarDataClient._by_title = by_title
                EdgarDataClient._by_title_norm = by_title_norm
                EdgarDataClient._all_keys = all_keys
                EdgarDataClient._index_built_time = current_time
        
    def search_company_by_name(self, company_name):
        """Search company CIK by company name with caching and optimized ticker matching"""
        try:
            # 实例级缓存命中检查(按规范化后的query)
            norm_query = self._normalize_text(company_name)
            cache_hit = self._search_cache.get(norm_query)
            if cache_hit:
                return cache_hit
            
            # 确保索引已构建(首次或过期后会重建)
            self._ensure_company_index()
            
            # 获取索引引用(已在锁内构建完成)
            by_ticker = EdgarDataClient._by_ticker
            by_title = EdgarDataClient._by_title
            by_title_norm = EdgarDataClient._by_title_norm
            all_keys = EdgarDataClient._all_keys
            
            # ✅ OPTIMIZATION 1: Ticker 优先匹配(遵循项目规范)
            raw = company_name.strip().lower()
            raw_compact = re.sub(r"[^a-z0-9]", "", raw)
            is_ticker_like = len(raw_compact) <= 5 and len(raw_compact) >= 1
            
            if is_ticker_like and raw_compact in by_ticker:
                result = by_ticker[raw_compact]
                self._search_cache[norm_query] = result
                return result
            
            # ✅ OPTIMIZATION 2: 别名映射(如 'google' -> 'alphabet inc')
            alias_target = EdgarDataClient._alias_map.get(norm_query)
            if alias_target:
                alias_norm = self._normalize_text(alias_target)
                # 先尝试规范化标题
                if alias_norm in by_title_norm:
                    result = by_title_norm[alias_norm]
                    self._search_cache[norm_query] = result
                    return result
                # 再尝试原始标题
                alias_lower = alias_target.lower()
                if alias_lower in by_title:
                    result = by_title[alias_lower]
                    self._search_cache[norm_query] = result
                    return result
                # 最后尝试 ticker(有些别名可能实际上是ticker)
                alias_ticker = re.sub(r"[^a-z0-9]", "", alias_lower)
                if alias_ticker in by_ticker:
                    result = by_ticker[alias_ticker]
                    self._search_cache[norm_query] = result
                    return result
            
            # ✅ OPTIMIZATION 3: 精确匹配(原始标题)
            title_lower = company_name.lower().strip()
            if title_lower in by_title:
                result = by_title[title_lower]
                self._search_cache[norm_query] = result
                return result
            
            # ✅ OPTIMIZATION 4: 精确匹配(规范化标题)
            if norm_query in by_title_norm:
                result = by_title_norm[norm_query]
                self._search_cache[norm_query] = result
                return result
            
            # ✅ OPTIMIZATION 5: 精确匹配(ticker,再次尝试原始输入)
            if raw_compact in by_ticker:
                result = by_ticker[raw_compact]
                self._search_cache[norm_query] = result
                return result
            
            # ✅ OPTIMIZATION 6: 部分包含匹配
            partial_matches = []
            for key in by_title_norm.keys():
                if norm_query in key:
                    partial_matches.append(key)
            if not partial_matches:
                for t in by_ticker.keys():
                    if norm_query in t:
                        partial_matches.append(t)
            if partial_matches:
                best_key = max(
                    partial_matches,
                    key=lambda k: difflib.SequenceMatcher(None, norm_query, k).ratio()
                )
                result = by_title_norm.get(best_key) or by_ticker.get(best_key)
                if result:
                    self._search_cache[norm_query] = result
                    return result
            
            # ✅ OPTIMIZATION 7: 模糊匹配(difflib,用于拼写近似的情况)
            close = difflib.get_close_matches(norm_query, all_keys, n=1, cutoff=0.78)
            if close:
                best = close[0]
                result = by_title_norm.get(best) or by_ticker.get(best)
                if result:
                    self._search_cache[norm_query] = result
                    return result
            
            # 未找到
            return None
            
        except TimeoutError as e:
            print(f"Timeout searching company: {e}")
            return None
        except Exception as e:
            print(f"Error searching company: {e}")
            return None
    
    @lru_cache(maxsize=128)
    def get_company_info(self, cik):
        """
        Get basic company information (cached)
        
        Args:
            cik (str): Company CIK code
            
        Returns:
            dict: Dictionary containing company information
        """
        if not self.edgar:
            print("sec_edgar_api library not installed")
            return None
            
        try:
            self._rate_limit()
            # Get company submissions (now has timeout protection)
            submissions = self.edgar.get_submissions(cik=cik)
            
            return {
                "cik": cik,
                "name": submissions.get("name", ""),
                "tickers": submissions.get("tickers", []),
                "sic": submissions.get("sic", ""),
                "sic_description": submissions.get("sicDescription", "")
            }
        except TimeoutError as e:
            print(f"Timeout getting company info for CIK {cik}: {e}")
            return None
        except Exception as e:
            print(f"Error getting company info: {e}")
            return None
    
    @lru_cache(maxsize=128)
    def get_company_filings(self, cik, form_types=None):
        """
        Get all company filing documents (cached)
        
        Args:
            cik (str): Company CIK code
            form_types (tuple): Tuple of form types, e.g., ('10-K', '10-Q'), None for all types
            
        Returns:
            list: List of filing documents
        """
        if not self.edgar:
            print("sec_edgar_api library not installed")
            return []
        
        # Convert list to tuple for caching (lists are not hashable)
        if form_types and isinstance(form_types, list):
            form_types = tuple(form_types)
            
        try:
            self._rate_limit()
            # Get company submissions (now has timeout protection)
            submissions = self.edgar.get_submissions(cik=cik)
            
            # Extract filing information
            filings = []
            recent = submissions.get("filings", {}).get("recent", {})
            
            # Get data from each field
            form_types_list = recent.get("form", [])
            filing_dates = recent.get("filingDate", [])
            accession_numbers = recent.get("accessionNumber", [])
            primary_documents = recent.get("primaryDocument", [])
            
            # Iterate through all filings
            for i in range(len(form_types_list)):
                form_type = form_types_list[i]
                
                # ✅ 归一化表单类型: "10-K/A" -> "10-K", "20-F/A" -> "20-F"
                # 这样修订版年报也能被正确识别和使用
                normalized_form_type = form_type.split('/')[0]
                
                # Filter by form type if specified (使用归一化后的类型)
                if form_types and normalized_form_type not in form_types:
                    continue
                
                filing_date = filing_dates[i] if i < len(filing_dates) else ""
                accession_number = accession_numbers[i] if i < len(accession_numbers) else ""
                primary_document = primary_documents[i] if i < len(primary_documents) else ""
                
                filing = {
                    "form_type": form_type,  # 保留原始form_type供参考
                    "filing_date": filing_date,
                    "accession_number": accession_number,
                    "primary_document": primary_document
                }
                
                filings.append(filing)
            
            return filings
        except TimeoutError as e:
            print(f"Timeout getting company filings for CIK {cik}: {e}")
            return []
        except Exception as e:
            print(f"Error getting company filings: {e}")
            return []
    
    @lru_cache(maxsize=128)
    def get_company_facts(self, cik):
        """
        Get all company financial facts data (cached)
        
        Args:
            cik (str): Company CIK code
            
        Returns:
            dict: Company financial facts data
        """
        if not self.edgar:
            print("sec_edgar_api library not installed")
            return {}
            
        try:
            self._rate_limit()
            # Now has timeout protection via monkey patch
            facts = self.edgar.get_company_facts(cik=cik)
            return facts
        except TimeoutError as e:
            print(f"Timeout getting company facts for CIK {cik}: {e}")
            return {}
        except Exception as e:
            print(f"Error getting company facts: {e}")
            return {}
    
    def get_financial_data_for_period(self, cik, period):
        """
        Get financial data for a specific period (supports annual and quarterly) - Cached
        
        Args:
            cik (str): Company CIK code
            period (str): Period in format 'YYYY' or 'YYYYQX' (e.g., '2025' or '2025Q3')
            
        Returns:
            dict: Financial data dictionary
        """
        if not self.edgar:
            print("sec_edgar_api library not installed")
            return {}
        
        # 实例级缓存(避免重复计算)
        cache_key = f"period_{cik}_{period}"
        if hasattr(self, '_period_cache') and cache_key in self._period_cache:
            return self._period_cache[cache_key]
        
        if not hasattr(self, '_period_cache'):
            self._period_cache = {}
            
        try:
            # Get company financial facts
            facts = self.get_company_facts(cik)
            
            if not facts:
                return {}
            
            # Extract us-gaap and ifrs-full financial data (20-F may use IFRS)
            us_gaap = facts.get("facts", {}).get("us-gaap", {})
            ifrs_full = facts.get("facts", {}).get("ifrs-full", {})
            
            # Define financial metrics and their XBRL tags
            # Include multiple possible tags to improve match rate (including US-GAAP and IFRS tags)
            financial_metrics = {
                "total_revenue": ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax", "RevenueFromContractWithCustomerIncludingAssessedTax", "SalesRevenueNet", "RevenueFromContractWithCustomer", "Revenue"],
                "net_income": ["NetIncomeLoss", "ProfitLoss", "NetIncome", "ProfitLossAttributableToOwnersOfParent"],
                "earnings_per_share": ["EarningsPerShareBasic", "EarningsPerShare", "BasicEarningsPerShare", "BasicEarningsLossPerShare"],
                "operating_expenses": ["OperatingExpenses", "OperatingCostsAndExpenses", "OperatingExpensesExcludingDepreciationAndAmortization", "CostsAndExpenses", "GeneralAndAdministrativeExpense", "CostOfRevenue", "ResearchAndDevelopmentExpense", "SellingAndMarketingExpense"],
                "operating_cash_flow": ["NetCashProvidedByUsedInOperatingActivities", "NetCashProvidedUsedInOperatingActivities", "NetCashFlowsFromUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"],
            }
            
            # Store result with new optimized structure
            result = {
                "period": period,
                "_metadata": {},
                "metrics": {}
            }
            
            # Determine target form types to search
            if 'Q' in period:
                # Quarterly data, mainly search 10-Q (20-F usually doesn't have quarterly reports)
                target_forms = ("10-Q",)  # Use tuple for caching
                target_forms_annual = ("10-K", "20-F")  # for fallback
                year = int(period.split('Q')[0])
                quarter = period.split('Q')[1]
            else:
                # Annual data, search 10-K and 20-F annual forms
                target_forms = ("10-K", "20-F")  # Use tuple for caching
                target_forms_annual = target_forms
                year = int(period)
                quarter = None
            
            # Get company filings to find accession number and primary document
            filings = self.get_company_filings(cik, form_types=target_forms)
            filings_map = {}  # Map: form -> {accession_number, primary_document, filing_date}
            
            # Build filing map for quick lookup
            for filing in filings:
                form_type = filing.get("form_type", "")
                filing_date = filing.get("filing_date", "")
                accession_number = filing.get("accession_number", "")
                primary_document = filing.get("primary_document", "")
                
                if filing_date and accession_number:
                    # Extract year from filing_date (format: YYYY-MM-DD)
                    file_year = int(filing_date[:4]) if len(filing_date) >= 4 else 0
                    
                    # ✅ 归一化表单类型: "10-K/A" -> "10-K", "20-F/A" -> "20-F"
                    # 使用归一化后的类型构建 key,这样 facts 中的 "10-K" 能命中 "10-K/A"
                    normalized_form_type = form_type.split('/')[0]
                    
                    # ✅ FIXED: Remove year filter to keep all filings
                    # 20-F forms are often filed in the year after the fiscal year
                    # We'll match them later using fiscal year (fy) and filed date
                    key = f"{normalized_form_type}_{file_year}"  # 使用归一化后的类型
                    if key not in filings_map:
                        filings_map[key] = {
                            "accession_number": accession_number,
                            "primary_document": primary_document,
                            "form_type": form_type,  # 保留原始 form_type
                            "filing_date": filing_date
                        }
            
            # Iterate through each financial metric
            for metric_key, metric_tags in financial_metrics.items():
                # Support multiple possible tags
                for metric_tag in metric_tags:
                    # Search both US-GAAP and IFRS tags
                    metric_data = None
                    data_source = None
                    
                    if metric_tag in us_gaap:
                        metric_data = us_gaap[metric_tag]
                        data_source = "us-gaap"
                    elif metric_tag in ifrs_full:
                        metric_data = ifrs_full[metric_tag]
                        data_source = "ifrs-full"
                    
                    if metric_data:
                        units = metric_data.get("units", {})
                        
                        # Find USD unit data (supports USD and USD/shares)
                        usd_data = None
                        if "USD" in units:
                            usd_data = units["USD"]
                        elif "USD/shares" in units and metric_key == "earnings_per_share":
                            # EPS uses USD/shares unit
                            usd_data = units["USD/shares"]
                        
                        if usd_data:
                            # Try exact match first, then loose match
                            matched_entry = None
                            
                            # Search for data in the specified period
                            for entry in usd_data:
                                form = entry.get("form", "")
                                fy = entry.get("fy", 0)
                                fp = entry.get("fp", "")
                                end_date = entry.get("end", "")
                                
                                if not end_date or len(end_date) < 4:
                                    continue
                                    
                                entry_year = int(end_date[:4])
                                
                                # Check if form type matches
                                if form in target_forms:
                                    if quarter:
                                        # Quarterly data match
                                        if entry_year == year and fp == f"Q{quarter}":
                                            # If already matched, compare end date, choose the latest
                                            if matched_entry:
                                                if entry.get("end", "") > matched_entry.get("end", ""):
                                                    matched_entry = entry
                                            else:
                                                matched_entry = entry
                                    else:
                                        # Annual data match - prioritize fiscal year (fy) field
                                        # Strategy 1: Exact match by fiscal year
                                        if fy == year and (fp == "FY" or fp == "" or not fp):
                                            # If already matched, compare end date, choose the latest
                                            if matched_entry:
                                                if entry.get("end", "") > matched_entry.get("end", ""):
                                                    matched_entry = entry
                                            else:
                                                matched_entry = entry
                                        # Strategy 2: Match by end date year (when fy not available or doesn't match)
                                        elif not matched_entry and entry_year == year and (fp == "FY" or fp == "" or not fp):
                                            matched_entry = entry
                                        # Strategy 3: Allow fy to differ by 1 year (fiscal year vs calendar year mismatch)
                                        elif not matched_entry and fy > 0 and abs(fy - year) <= 1 and (fp == "FY" or fp == "" or not fp):
                                            matched_entry = entry
                                        # Strategy 4: Match by frame field for 20-F
                                        elif not matched_entry and form == "20-F" and "frame" in entry:
                                            frame = entry.get("frame", "")
                                            if f"CY{year}" in frame or str(year) in end_date:
                                                matched_entry = entry
                            
                            # If quarterly data not found, try finding from annual report (fallback strategy)
                            if not matched_entry and quarter and target_forms_annual:
                                for entry in usd_data:
                                    form = entry.get("form", "")
                                    end_date = entry.get("end", "")
                                    fp = entry.get("fp", "")
                                    
                                    if form in target_forms_annual and end_date:
                                        # Check if end date is within this quarter range
                                        if str(year) in end_date and f"Q{quarter}" in fp:
                                            matched_entry = entry
                                            break
                            
                            # Apply matched data
                            if matched_entry:
                                # Store metric value and tag
                                result["metrics"][metric_key] = {
                                    "value": matched_entry.get("val", 0),
                                    "tag": metric_tag
                                }
                                
                                # Get form and accession info - only populate metadata once
                                if not result["_metadata"]:
                                    form_type = matched_entry.get("form", "")
                                    accn_from_facts = matched_entry.get('accn', '').replace('-', '')
                                    filed_date = matched_entry.get('filed', '')
                                    
                                    # Multi-strategy filing lookup for 20-F and cross-year submissions
                                    filing_info = None
                                    
                                    # Strategy 1: Try matching by fiscal year
                                    filing_key = f"{form_type}_{year}"
                                    filing_info = filings_map.get(filing_key)
                                    
                                    # Strategy 2: Try matching by filed year (for 20-F filed in next year)
                                    if not filing_info and filed_date:
                                        filed_year = int(filed_date[:4]) if len(filed_date) >= 4 else 0
                                        if filed_year > 0:
                                            filing_key = f"{form_type}_{filed_year}"
                                            filing_info = filings_map.get(filing_key)
                                    
                                    # Strategy 3: Try fiscal year + 1 (common for 20-F)
                                    if not filing_info:
                                        filing_key = f"{form_type}_{year + 1}"
                                        filing_info = filings_map.get(filing_key)
                                    
                                    # Strategy 4: Search all filings with matching form type and accession
                                    if not filing_info and accn_from_facts:
                                        for key, finfo in filings_map.items():
                                            if finfo["form_type"] == form_type:
                                                filing_accn = finfo["accession_number"].replace('-', '')
                                                if filing_accn == accn_from_facts:
                                                    filing_info = finfo
                                                    break
                                    
                                    # Generate source URL
                                    source_url = ""
                                    if filing_info:
                                        accession_number = filing_info["accession_number"].replace('-', '')
                                        primary_document = filing_info["primary_document"]
                                        if primary_document:
                                            source_url = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}/{primary_document}"
                                        else:
                                            source_url = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100"
                                    else:
                                        source_url = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100"
                                    
                                    # Populate metadata (only once per period)
                                    result["_metadata"] = {
                                        "form": matched_entry.get("form", ""),
                                        "fiscal_year": matched_entry.get("fy", 0),
                                        "fiscal_period": matched_entry.get("fp", ""),
                                        "start_date": matched_entry.get("start", ""),
                                        "end_date": matched_entry.get("end", ""),
                                        "filed_date": matched_entry.get("filed", ""),
                                        "source_url": source_url,
                                        "data_source": data_source
                                    }
                        
                        # If data is found, break out of tag loop
                        if metric_key in result["metrics"]:
                            break
            
            # 缓存结果
            if result and "period" in result:
                self._period_cache[cache_key] = result
            
            return result
        except Exception as e:
            print(f"Error getting financial data for period {period}: {e}")
            return {}