ginipick commited on
Commit
298104b
ยท
verified ยท
1 Parent(s): 98c792d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -18
app.py CHANGED
@@ -1287,14 +1287,13 @@ class AdvancedAIAnalyzer:
1287
  """AI Times์—์„œ ์˜ค๋Š˜ ๋‚ ์งœ ๋‰ด์Šค ํฌ๋กค๋ง"""
1288
  print("๐Ÿ“ฐ AI Times ๋‰ด์Šค ์ˆ˜์ง‘ ์ค‘...")
1289
 
1290
- # ์ˆ˜์ง‘ํ•  URL ๋ชฉ๋ก
1291
  urls = [
1292
  'https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm',
1293
  'https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm'
1294
  ]
1295
 
1296
  all_news = []
1297
- today = datetime.now().strftime('%m-%d') # ์˜ˆ: '10-10'
1298
 
1299
  for url_idx, url in enumerate(urls, 1):
1300
  try:
@@ -1307,14 +1306,8 @@ class AdvancedAIAnalyzer:
1307
 
1308
  text = response.text
1309
 
1310
- # ํŒจํ„ด: [์ œ๋ชฉ](๋งํฌ)...๋‚ ์งœ
1311
- # ์˜ˆ: [MS "๊ธ‰์ฆํ•˜๋Š” '์ฑ—GPT' ์ˆ˜์š”๋กœ ๋ฐ์ดํ„ฐ์„ผํ„ฐ ๋ถ€์กฑ...2026๋…„๊นŒ์ง€ ์ง€์†๋  ๋“ฏ"](https://www.aitimes.com/news/articleView.html?idxno=203055)
1312
- # ...
1313
- # ์‚ฐ์—…์ผ๋ฐ˜๋ฐ•์ฐฌ ๊ธฐ์ž10-10 15:10
1314
-
1315
- # ์ œ๋ชฉ๊ณผ ๋งํฌ ๋งค์นญ ํŒจํ„ด
1316
  pattern = r'\[([^\]]+)\]\((https://www\.aitimes\.com/news/articleView\.html\?idxno=\d+)\)'
1317
-
1318
  matches = re.finditer(pattern, text)
1319
 
1320
  articles_found = 0
@@ -1323,15 +1316,12 @@ class AdvancedAIAnalyzer:
1323
  title = match.group(1).strip()
1324
  link = match.group(2).strip()
1325
 
1326
- # ์ œ๋ชฉ์ด ๋„ˆ๋ฌด ์งง์œผ๋ฉด ์Šคํ‚ต
1327
  if len(title) < 10:
1328
  continue
1329
 
1330
- # ํ•ด๋‹น ๊ธฐ์‚ฌ์˜ ๋‚ ์งœ ์ฐพ๊ธฐ (๋งํฌ ๋’ค์—์„œ 100์ž ์ด๋‚ด)
1331
  pos = match.end()
1332
  nearby_text = text[pos:pos+200]
1333
-
1334
- # ๋‚ ์งœ ํŒจํ„ด: 10-10 15:10 ํ˜•์‹
1335
  date_pattern = r'(\d{2}-\d{2}\s+\d{2}:\d{2})'
1336
  date_match = re.search(date_pattern, nearby_text)
1337
 
@@ -1351,20 +1341,19 @@ class AdvancedAIAnalyzer:
1351
 
1352
  all_news.append(news_item)
1353
  articles_found += 1
1354
-
1355
  print(f" โœ“ ์ถ”๊ฐ€: {title[:60]}... ({date_text})")
1356
 
1357
  except Exception as e:
1358
  continue
1359
 
1360
  print(f" โ†’ {articles_found}๊ฐœ ์˜ค๋Š˜์ž ๊ธฐ์‚ฌ ๋ฐœ๊ฒฌ\n")
1361
- time.sleep(1) # ์„œ๋ฒ„ ๋ถ€ํ•˜ ๋ฐฉ์ง€
1362
 
1363
  except Exception as e:
1364
  print(f" โš ๏ธ URL ์ˆ˜์ง‘ ์˜ค๋ฅ˜: {e}\n")
1365
  continue
1366
 
1367
- # ์ค‘๋ณต ์ œ๊ฑฐ (URL ๊ธฐ์ค€)
1368
  unique_news = []
1369
  seen_urls = set()
1370
  for news in all_news:
@@ -1374,7 +1363,7 @@ class AdvancedAIAnalyzer:
1374
 
1375
  print(f"โœ… ์ด {len(unique_news)}๊ฐœ ์ค‘๋ณต ์ œ๊ฑฐ๋œ ์˜ค๋Š˜์ž ๋‰ด์Šค\n")
1376
 
1377
- # ์ตœ์†Œ 3๊ฐœ๋Š” ๋ณด์žฅ (์—†์œผ๋ฉด ์ƒ˜ํ”Œ ์ถ”๊ฐ€)
1378
  if len(unique_news) < 3:
1379
  print("โš ๏ธ ๋‰ด์Šค๊ฐ€ ๋ถ€์กฑํ•˜์—ฌ ์ตœ๊ทผ ์ƒ˜ํ”Œ ์ถ”๊ฐ€\n")
1380
  sample_news = [
@@ -1404,7 +1393,7 @@ class AdvancedAIAnalyzer:
1404
  if sample['url'] not in seen_urls:
1405
  unique_news.append(sample)
1406
 
1407
- return unique_news[:20] # ์ตœ๋Œ€ 20๊ฐœ
1408
 
1409
  def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]:
1410
  """ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ 30๊ฐœ ์ˆ˜์ง‘ (์‹ค์ œ API)"""
 
1287
  """AI Times์—์„œ ์˜ค๋Š˜ ๋‚ ์งœ ๋‰ด์Šค ํฌ๋กค๋ง"""
1288
  print("๐Ÿ“ฐ AI Times ๋‰ด์Šค ์ˆ˜์ง‘ ์ค‘...")
1289
 
 
1290
  urls = [
1291
  'https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm',
1292
  'https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm'
1293
  ]
1294
 
1295
  all_news = []
1296
+ today = datetime.now().strftime('%m-%d')
1297
 
1298
  for url_idx, url in enumerate(urls, 1):
1299
  try:
 
1306
 
1307
  text = response.text
1308
 
1309
+ # ์ •๊ทœํ‘œํ˜„์‹์œผ๋กœ [์ œ๋ชฉ](๋งํฌ) ํŒจํ„ด ์ถ”์ถœ
 
 
 
 
 
1310
  pattern = r'\[([^\]]+)\]\((https://www\.aitimes\.com/news/articleView\.html\?idxno=\d+)\)'
 
1311
  matches = re.finditer(pattern, text)
1312
 
1313
  articles_found = 0
 
1316
  title = match.group(1).strip()
1317
  link = match.group(2).strip()
1318
 
 
1319
  if len(title) < 10:
1320
  continue
1321
 
1322
+ # ๋‚ ์งœ ์ฐพ๊ธฐ (๋งํฌ ๋’ค 200์ž ์ด๋‚ด)
1323
  pos = match.end()
1324
  nearby_text = text[pos:pos+200]
 
 
1325
  date_pattern = r'(\d{2}-\d{2}\s+\d{2}:\d{2})'
1326
  date_match = re.search(date_pattern, nearby_text)
1327
 
 
1341
 
1342
  all_news.append(news_item)
1343
  articles_found += 1
 
1344
  print(f" โœ“ ์ถ”๊ฐ€: {title[:60]}... ({date_text})")
1345
 
1346
  except Exception as e:
1347
  continue
1348
 
1349
  print(f" โ†’ {articles_found}๊ฐœ ์˜ค๋Š˜์ž ๊ธฐ์‚ฌ ๋ฐœ๊ฒฌ\n")
1350
+ time.sleep(1)
1351
 
1352
  except Exception as e:
1353
  print(f" โš ๏ธ URL ์ˆ˜์ง‘ ์˜ค๋ฅ˜: {e}\n")
1354
  continue
1355
 
1356
+ # ์ค‘๋ณต ์ œ๊ฑฐ
1357
  unique_news = []
1358
  seen_urls = set()
1359
  for news in all_news:
 
1363
 
1364
  print(f"โœ… ์ด {len(unique_news)}๊ฐœ ์ค‘๋ณต ์ œ๊ฑฐ๋œ ์˜ค๋Š˜์ž ๋‰ด์Šค\n")
1365
 
1366
+ # ์ตœ์†Œ 3๊ฐœ ๋ณด์žฅ
1367
  if len(unique_news) < 3:
1368
  print("โš ๏ธ ๋‰ด์Šค๊ฐ€ ๋ถ€์กฑํ•˜์—ฌ ์ตœ๊ทผ ์ƒ˜ํ”Œ ์ถ”๊ฐ€\n")
1369
  sample_news = [
 
1393
  if sample['url'] not in seen_urls:
1394
  unique_news.append(sample)
1395
 
1396
+ return unique_news[:20]
1397
 
1398
  def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]:
1399
  """ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ 30๊ฐœ ์ˆ˜์ง‘ (์‹ค์ œ API)"""