Spaces:

ginigen
/

Today

Running

App Files Files Community

ginipick commited on Oct 10

Commit

298104b

verified ·

1 Parent(s): 98c792d

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -18

app.py CHANGED Viewed

@@ -1287,14 +1287,13 @@ class AdvancedAIAnalyzer:
         """AI Times에서 오늘 날짜 뉴스 크롤링"""
         print("📰 AI Times 뉴스 수집 중...")
-        # 수집할 URL 목록
         urls = [
             'https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm',
             'https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm'
         ]
         all_news = []
-        today = datetime.now().strftime('%m-%d')  # 예: '10-10'
         for url_idx, url in enumerate(urls, 1):
             try:
@@ -1307,14 +1306,8 @@ class AdvancedAIAnalyzer:
                 text = response.text
-                # 패턴: [제목](링크)...날짜
-                # 예: [MS "급증하는 '챗GPT' 수요로 데이터센터 부족...2026년까지 지속될 듯"](https://www.aitimes.com/news/articleView.html?idxno=203055)
-                # ...
-                # 산업일반박찬 기자10-10 15:10
-                # 제목과 링크 매칭 패턴
                 pattern = r'\[([^\]]+)\]\((https://www\.aitimes\.com/news/articleView\.html\?idxno=\d+)\)'
                 matches = re.finditer(pattern, text)
                 articles_found = 0
@@ -1323,15 +1316,12 @@ class AdvancedAIAnalyzer:
                         title = match.group(1).strip()
                         link = match.group(2).strip()
-                        # 제목이 너무 짧으면 스킵
                         if len(title) < 10:
                             continue
-                        # 해당 기사의 날짜 찾기 (링크 뒤에서 100자 이내)
                         pos = match.end()
                         nearby_text = text[pos:pos+200]
-                        # 날짜 패턴: 10-10 15:10 형식
                         date_pattern = r'(\d{2}-\d{2}\s+\d{2}:\d{2})'
                         date_match = re.search(date_pattern, nearby_text)
@@ -1351,20 +1341,19 @@ class AdvancedAIAnalyzer:
                         all_news.append(news_item)
                         articles_found += 1
                         print(f"    ✓ 추가: {title[:60]}... ({date_text})")
                     except Exception as e:
                         continue
                 print(f"    → {articles_found}개 오늘자 기사 발견\n")
-                time.sleep(1)  # 서버 부하 방지
             except Exception as e:
                 print(f"  ⚠️ URL 수집 오류: {e}\n")
                 continue
-        # 중복 제거 (URL 기준)
         unique_news = []
         seen_urls = set()
         for news in all_news:
@@ -1374,7 +1363,7 @@ class AdvancedAIAnalyzer:
         print(f"✅ 총 {len(unique_news)}개 중복 제거된 오늘자 뉴스\n")
-        # 최소 3개는 보장 (없으면 샘플 추가)
         if len(unique_news) < 3:
             print("⚠️ 뉴스가 부족하여 최근 샘플 추가\n")
             sample_news = [
@@ -1404,7 +1393,7 @@ class AdvancedAIAnalyzer:
                 if sample['url'] not in seen_urls:
                     unique_news.append(sample)
-        return unique_news[:20]  # 최대 20개
     def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]:
         """허깅페이스 트렌딩 모델 30개 수집 (실제 API)"""

         """AI Times에서 오늘 날짜 뉴스 크롤링"""
         print("📰 AI Times 뉴스 수집 중...")
         urls = [
             'https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm',
             'https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm'
         ]
         all_news = []
+        today = datetime.now().strftime('%m-%d')
         for url_idx, url in enumerate(urls, 1):
             try:
                 text = response.text
+                # 정규표현식으로 [제목](링크) 패턴 추출
                 pattern = r'\[([^\]]+)\]\((https://www\.aitimes\.com/news/articleView\.html\?idxno=\d+)\)'
                 matches = re.finditer(pattern, text)
                 articles_found = 0
                         title = match.group(1).strip()
                         link = match.group(2).strip()
                         if len(title) < 10:
                             continue
+                        # 날짜 찾기 (링크 뒤 200자 이내)
                         pos = match.end()
                         nearby_text = text[pos:pos+200]
                         date_pattern = r'(\d{2}-\d{2}\s+\d{2}:\d{2})'
                         date_match = re.search(date_pattern, nearby_text)
                         all_news.append(news_item)
                         articles_found += 1
                         print(f"    ✓ 추가: {title[:60]}... ({date_text})")
                     except Exception as e:
                         continue
                 print(f"    → {articles_found}개 오늘자 기사 발견\n")
+                time.sleep(1)
             except Exception as e:
                 print(f"  ⚠️ URL 수집 오류: {e}\n")
                 continue
+        # 중복 제거
         unique_news = []
         seen_urls = set()
         for news in all_news:
         print(f"✅ 총 {len(unique_news)}개 중복 제거된 오늘자 뉴스\n")
+        # 최소 3개 보장
         if len(unique_news) < 3:
             print("⚠️ 뉴스가 부족하여 최근 샘플 추가\n")
             sample_news = [
                 if sample['url'] not in seen_urls:
                     unique_news.append(sample)
+        return unique_news[:20]
     def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]:
         """허깅페이스 트렌딩 모델 30개 수집 (실제 API)"""