ginipick commited on
Commit
48b33a3
ยท
verified ยท
1 Parent(s): 4a10154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -126
app.py CHANGED
@@ -35,6 +35,7 @@ import sqlite3
35
  import time
36
  from huggingface_hub import HfApi
37
  from bs4 import BeautifulSoup
 
38
 
39
  # Flask ์•ฑ ์ดˆ๊ธฐํ™”
40
  app = Flask(__name__)
@@ -1282,6 +1283,129 @@ class AdvancedAIAnalyzer:
1282
  }
1283
  self.news_data = []
1284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1285
  def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]:
1286
  """ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ 30๊ฐœ ์ˆ˜์ง‘ (์‹ค์ œ API)"""
1287
  print(f"๐Ÿค— ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ {limit}๊ฐœ ์ˆ˜์ง‘ ์ค‘...")
@@ -1416,131 +1540,6 @@ class AdvancedAIAnalyzer:
1416
  print("๐Ÿ’พ DB์—์„œ ์ด์ „ ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์‹œ๋„...")
1417
  return load_spaces_from_db()
1418
 
1419
- def fetch_aitimes_news(self) -> List[Dict]:
1420
- """AI Times์—์„œ ์˜ค๋Š˜ ๋‚ ์งœ ๋‰ด์Šค ํฌ๋กค๋ง"""
1421
- import re
1422
-
1423
- print("๐Ÿ“ฐ AI Times ๋‰ด์Šค ์ˆ˜์ง‘ ์ค‘...")
1424
-
1425
- # ์ˆ˜์ง‘ํ•  URL ๋ชฉ๋ก
1426
- urls = [
1427
- 'https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm',
1428
- 'https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm'
1429
- ]
1430
-
1431
- all_news = []
1432
- today = datetime.now().strftime('%m-%d') # ์˜ˆ: '10-10'
1433
-
1434
- for url_idx, url in enumerate(urls, 1):
1435
- try:
1436
- print(f" ๐Ÿ” [{url_idx}/2] ์ˆ˜์ง‘ ์ค‘: {url}")
1437
- response = requests.get(url, timeout=15, headers={
1438
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
1439
- })
1440
- response.raise_for_status()
1441
- response.encoding = 'utf-8'
1442
-
1443
- text = response.text
1444
-
1445
- # ํŒจํ„ด: [์ œ๋ชฉ](๋งํฌ)...๋‚ ์งœ
1446
- # ์˜ˆ: [MS "๊ธ‰์ฆํ•˜๋Š” '์ฑ—GPT' ์ˆ˜์š”๋กœ ๋ฐ์ดํ„ฐ์„ผํ„ฐ ๋ถ€์กฑ...2026๋…„๊นŒ์ง€ ์ง€์†๋  ๋“ฏ"](https://www.aitimes.com/news/articleView.html?idxno=203055)
1447
- # ...
1448
- # ์‚ฐ์—…์ผ๋ฐ˜๋ฐ•์ฐฌ ๊ธฐ์ž10-10 15:10
1449
-
1450
- # ์ œ๋ชฉ๊ณผ ๋งํฌ ๋งค์นญ ํŒจํ„ด
1451
- pattern = r'\[([^\]]+)\]\((https://www\.aitimes\.com/news/articleView\.html\?idxno=\d+)\)'
1452
-
1453
- matches = re.finditer(pattern, text)
1454
-
1455
- articles_found = 0
1456
- for match in matches:
1457
- try:
1458
- title = match.group(1).strip()
1459
- link = match.group(2).strip()
1460
-
1461
- # ์ œ๋ชฉ์ด ๋„ˆ๋ฌด ์งง์œผ๋ฉด ์Šคํ‚ต
1462
- if len(title) < 10:
1463
- continue
1464
-
1465
- # ํ•ด๋‹น ๊ธฐ์‚ฌ์˜ ๋‚ ์งœ ์ฐพ๊ธฐ (๋งํฌ ๋’ค์—์„œ 100์ž ์ด๋‚ด)
1466
- pos = match.end()
1467
- nearby_text = text[pos:pos+200]
1468
-
1469
- # ๋‚ ์งœ ํŒจํ„ด: 10-10 15:10 ํ˜•์‹
1470
- date_pattern = r'(\d{2}-\d{2}\s+\d{2}:\d{2})'
1471
- date_match = re.search(date_pattern, nearby_text)
1472
-
1473
- date_text = date_match.group(1) if date_match else today
1474
-
1475
- # ์˜ค๋Š˜ ๋‚ ์งœ๋งŒ ํ•„ํ„ฐ๋ง
1476
- if today not in date_text:
1477
- continue
1478
-
1479
- news_item = {
1480
- 'title': title,
1481
- 'url': link,
1482
- 'date': date_text,
1483
- 'source': 'AI Times',
1484
- 'category': 'AI'
1485
- }
1486
-
1487
- all_news.append(news_item)
1488
- articles_found += 1
1489
-
1490
- print(f" โœ“ ์ถ”๊ฐ€: {title[:60]}... ({date_text})")
1491
-
1492
- except Exception as e:
1493
- continue
1494
-
1495
- print(f" โ†’ {articles_found}๊ฐœ ์˜ค๋Š˜์ž ๊ธฐ์‚ฌ ๋ฐœ๊ฒฌ\n")
1496
- time.sleep(1) # ์„œ๋ฒ„ ๋ถ€ํ•˜ ๋ฐฉ์ง€
1497
-
1498
- except Exception as e:
1499
- print(f" โš ๏ธ URL ์ˆ˜์ง‘ ์˜ค๋ฅ˜: {e}\n")
1500
- continue
1501
-
1502
- # ์ค‘๋ณต ์ œ๊ฑฐ (URL ๊ธฐ์ค€)
1503
- unique_news = []
1504
- seen_urls = set()
1505
- for news in all_news:
1506
- if news['url'] not in seen_urls:
1507
- unique_news.append(news)
1508
- seen_urls.add(news['url'])
1509
-
1510
- print(f"โœ… ์ด {len(unique_news)}๊ฐœ ์ค‘๋ณต ์ œ๊ฑฐ๋œ ์˜ค๋Š˜์ž ๋‰ด์Šค\n")
1511
-
1512
- # ์ตœ์†Œ 3๊ฐœ๋Š” ๋ณด์žฅ (์—†์œผ๋ฉด ์ƒ˜ํ”Œ ์ถ”๊ฐ€)
1513
- if len(unique_news) < 3:
1514
- print("โš ๏ธ ๋‰ด์Šค๊ฐ€ ๋ถ€์กฑํ•˜์—ฌ ์ตœ๊ทผ ์ƒ˜ํ”Œ ์ถ”๊ฐ€\n")
1515
- sample_news = [
1516
- {
1517
- 'title': 'MS "์ฑ—GPT ์ˆ˜์š” ํญ์ฆ์œผ๋กœ ๋ฐ์ดํ„ฐ์„ผํ„ฐ ๋ถ€์กฑ...2026๋…„๊นŒ์ง€ ์ง€์†"',
1518
- 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203055',
1519
- 'date': '10-10 15:10',
1520
- 'source': 'AI Times',
1521
- 'category': 'AI'
1522
- },
1523
- {
1524
- 'title': '๋ฏธ๊ตญ, UAE์— GPU ํŒ๋งค ์ผ๋ถ€ ์Šน์ธ...์—”๋น„๋””์•„ ์‹œ์ด 5์กฐ๋‹ฌ๋Ÿฌ ๋ˆˆ์•ž',
1525
- 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203053',
1526
- 'date': '10-10 14:46',
1527
- 'source': 'AI Times',
1528
- 'category': 'AI'
1529
- },
1530
- {
1531
- 'title': '์†Œ๋ผ, ์ฑ—GPT๋ณด๋‹ค ๋นจ๋ฆฌ 100๋งŒ ๋‹ค์šด๋กœ๋“œ ๋ŒํŒŒ',
1532
- 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203045',
1533
- 'date': '10-10 12:55',
1534
- 'source': 'AI Times',
1535
- 'category': 'AI'
1536
- }
1537
- ]
1538
- for sample in sample_news:
1539
- if sample['url'] not in seen_urls:
1540
- unique_news.append(sample)
1541
-
1542
- return unique_news[:20] # ์ตœ๋Œ€ 20๊ฐœ
1543
-
1544
  def analyze_all_news(self) -> List[Dict]:
1545
  """๋ชจ๋“  ๋‰ด์Šค์— LLM ๋ถ„์„ ์ถ”๊ฐ€"""
1546
  print("๐Ÿ“ฐ ๋‰ด์Šค LLM ๋ถ„์„ ์‹œ์ž‘...")
@@ -1804,5 +1803,5 @@ if __name__ == '__main__':
1804
  print("\n\n๐Ÿ‘‹ ์„œ๋ฒ„ ์ข…๋ฃŒ!")
1805
  sys.exit(0)
1806
  except Exception as e:
1807
- print(f"\nโŒ ์„œ๋ฒ„ ์˜ค๋ฅ˜: {e}")
1808
  sys.exit(1)
 
35
  import time
36
  from huggingface_hub import HfApi
37
  from bs4 import BeautifulSoup
38
+ import re
39
 
40
  # Flask ์•ฑ ์ดˆ๊ธฐํ™”
41
  app = Flask(__name__)
 
1283
  }
1284
  self.news_data = []
1285
 
1286
+ def fetch_aitimes_news(self) -> List[Dict]:
1287
+ """AI Times์—์„œ ์˜ค๋Š˜ ๋‚ ์งœ ๋‰ด์Šค ํฌ๋กค๋ง"""
1288
+ print("๐Ÿ“ฐ AI Times ๋‰ด์Šค ์ˆ˜์ง‘ ์ค‘...")
1289
+
1290
+ # ์ˆ˜์ง‘ํ•  URL ๋ชฉ๋ก
1291
+ urls = [
1292
+ 'https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm',
1293
+ 'https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm'
1294
+ ]
1295
+
1296
+ all_news = []
1297
+ today = datetime.now().strftime('%m-%d') # ์˜ˆ: '10-10'
1298
+
1299
+ for url_idx, url in enumerate(urls, 1):
1300
+ try:
1301
+ print(f" ๐Ÿ” [{url_idx}/2] ์ˆ˜์ง‘ ์ค‘: {url}")
1302
+ response = requests.get(url, timeout=15, headers={
1303
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
1304
+ })
1305
+ response.raise_for_status()
1306
+ response.encoding = 'utf-8'
1307
+
1308
+ text = response.text
1309
+
1310
+ # ํŒจํ„ด: [์ œ๋ชฉ](๋งํฌ)...๋‚ ์งœ
1311
+ # ์˜ˆ: [MS "๊ธ‰์ฆํ•˜๋Š” '์ฑ—GPT' ์ˆ˜์š”๋กœ ๋ฐ์ดํ„ฐ์„ผํ„ฐ ๋ถ€์กฑ...2026๋…„๊นŒ์ง€ ์ง€์†๋  ๋“ฏ"](https://www.aitimes.com/news/articleView.html?idxno=203055)
1312
+ # ...
1313
+ # ์‚ฐ์—…์ผ๋ฐ˜๋ฐ•์ฐฌ ๊ธฐ์ž10-10 15:10
1314
+
1315
+ # ์ œ๋ชฉ๊ณผ ๋งํฌ ๋งค์นญ ํŒจํ„ด
1316
+ pattern = r'\[([^\]]+)\]\((https://www\.aitimes\.com/news/articleView\.html\?idxno=\d+)\)'
1317
+
1318
+ matches = re.finditer(pattern, text)
1319
+
1320
+ articles_found = 0
1321
+ for match in matches:
1322
+ try:
1323
+ title = match.group(1).strip()
1324
+ link = match.group(2).strip()
1325
+
1326
+ # ์ œ๋ชฉ์ด ๋„ˆ๋ฌด ์งง์œผ๋ฉด ์Šคํ‚ต
1327
+ if len(title) < 10:
1328
+ continue
1329
+
1330
+ # ํ•ด๋‹น ๊ธฐ์‚ฌ์˜ ๋‚ ์งœ ์ฐพ๊ธฐ (๋งํฌ ๋’ค์—์„œ 100์ž ์ด๋‚ด)
1331
+ pos = match.end()
1332
+ nearby_text = text[pos:pos+200]
1333
+
1334
+ # ๋‚ ์งœ ํŒจํ„ด: 10-10 15:10 ํ˜•์‹
1335
+ date_pattern = r'(\d{2}-\d{2}\s+\d{2}:\d{2})'
1336
+ date_match = re.search(date_pattern, nearby_text)
1337
+
1338
+ date_text = date_match.group(1) if date_match else today
1339
+
1340
+ # ์˜ค๋Š˜ ๋‚ ์งœ๋งŒ ํ•„ํ„ฐ๋ง
1341
+ if today not in date_text:
1342
+ continue
1343
+
1344
+ news_item = {
1345
+ 'title': title,
1346
+ 'url': link,
1347
+ 'date': date_text,
1348
+ 'source': 'AI Times',
1349
+ 'category': 'AI'
1350
+ }
1351
+
1352
+ all_news.append(news_item)
1353
+ articles_found += 1
1354
+
1355
+ print(f" โœ“ ์ถ”๊ฐ€: {title[:60]}... ({date_text})")
1356
+
1357
+ except Exception as e:
1358
+ continue
1359
+
1360
+ print(f" โ†’ {articles_found}๊ฐœ ์˜ค๋Š˜์ž ๊ธฐ์‚ฌ ๋ฐœ๊ฒฌ\n")
1361
+ time.sleep(1) # ์„œ๋ฒ„ ๋ถ€ํ•˜ ๋ฐฉ์ง€
1362
+
1363
+ except Exception as e:
1364
+ print(f" โš ๏ธ URL ์ˆ˜์ง‘ ์˜ค๋ฅ˜: {e}\n")
1365
+ continue
1366
+
1367
+ # ์ค‘๋ณต ์ œ๊ฑฐ (URL ๊ธฐ์ค€)
1368
+ unique_news = []
1369
+ seen_urls = set()
1370
+ for news in all_news:
1371
+ if news['url'] not in seen_urls:
1372
+ unique_news.append(news)
1373
+ seen_urls.add(news['url'])
1374
+
1375
+ print(f"โœ… ์ด {len(unique_news)}๊ฐœ ์ค‘๋ณต ์ œ๊ฑฐ๋œ ์˜ค๋Š˜์ž ๋‰ด์Šค\n")
1376
+
1377
+ # ์ตœ์†Œ 3๊ฐœ๋Š” ๋ณด์žฅ (์—†์œผ๋ฉด ์ƒ˜ํ”Œ ์ถ”๊ฐ€)
1378
+ if len(unique_news) < 3:
1379
+ print("โš ๏ธ ๋‰ด์Šค๊ฐ€ ๋ถ€์กฑํ•˜์—ฌ ์ตœ๊ทผ ์ƒ˜ํ”Œ ์ถ”๊ฐ€\n")
1380
+ sample_news = [
1381
+ {
1382
+ 'title': 'MS "์ฑ—GPT ์ˆ˜์š” ํญ์ฆ์œผ๋กœ ๋ฐ์ดํ„ฐ์„ผํ„ฐ ๋ถ€์กฑ...2026๋…„๊นŒ์ง€ ์ง€์†"',
1383
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203055',
1384
+ 'date': '10-10 15:10',
1385
+ 'source': 'AI Times',
1386
+ 'category': 'AI'
1387
+ },
1388
+ {
1389
+ 'title': '๋ฏธ๊ตญ, UAE์— GPU ํŒ๋งค ์ผ๋ถ€ ์Šน์ธ...์—”๋น„๋””์•„ ์‹œ์ด 5์กฐ๋‹ฌ๋Ÿฌ ๋ˆˆ์•ž',
1390
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203053',
1391
+ 'date': '10-10 14:46',
1392
+ 'source': 'AI Times',
1393
+ 'category': 'AI'
1394
+ },
1395
+ {
1396
+ 'title': '์†Œ๋ผ, ์ฑ—GPT๋ณด๋‹ค ๋นจ๋ฆฌ 100๋งŒ ๋‹ค์šด๋กœ๋“œ ๋ŒํŒŒ',
1397
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203045',
1398
+ 'date': '10-10 12:55',
1399
+ 'source': 'AI Times',
1400
+ 'category': 'AI'
1401
+ }
1402
+ ]
1403
+ for sample in sample_news:
1404
+ if sample['url'] not in seen_urls:
1405
+ unique_news.append(sample)
1406
+
1407
+ return unique_news[:20] # ์ตœ๋Œ€ 20๊ฐœ
1408
+
1409
  def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]:
1410
  """ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ 30๊ฐœ ์ˆ˜์ง‘ (์‹ค์ œ API)"""
1411
  print(f"๐Ÿค— ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ {limit}๊ฐœ ์ˆ˜์ง‘ ์ค‘...")
 
1540
  print("๐Ÿ’พ DB์—์„œ ์ด์ „ ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์‹œ๋„...")
1541
  return load_spaces_from_db()
1542
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1543
  def analyze_all_news(self) -> List[Dict]:
1544
  """๋ชจ๋“  ๋‰ด์Šค์— LLM ๋ถ„์„ ์ถ”๊ฐ€"""
1545
  print("๐Ÿ“ฐ ๋‰ด์Šค LLM ๋ถ„์„ ์‹œ์ž‘...")
 
1803
  print("\n\n๐Ÿ‘‹ ์„œ๋ฒ„ ์ข…๋ฃŒ!")
1804
  sys.exit(0)
1805
  except Exception as e:
1806
+ print(f"\nโŒ์„œ๋ฒ„ ์˜ค๋ฅ˜: {e}")
1807
  sys.exit(1)