Update app.py
Browse files
app.py
CHANGED
|
@@ -35,6 +35,7 @@ import sqlite3
|
|
| 35 |
import time
|
| 36 |
from huggingface_hub import HfApi
|
| 37 |
from bs4 import BeautifulSoup
|
|
|
|
| 38 |
|
| 39 |
# Flask ์ฑ ์ด๊ธฐํ
|
| 40 |
app = Flask(__name__)
|
|
@@ -1282,6 +1283,129 @@ class AdvancedAIAnalyzer:
|
|
| 1282 |
}
|
| 1283 |
self.news_data = []
|
| 1284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1285 |
def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]:
|
| 1286 |
"""ํ๊น
ํ์ด์ค ํธ๋ ๋ฉ ๋ชจ๋ธ 30๊ฐ ์์ง (์ค์ API)"""
|
| 1287 |
print(f"๐ค ํ๊น
ํ์ด์ค ํธ๋ ๋ฉ ๋ชจ๋ธ {limit}๊ฐ ์์ง ์ค...")
|
|
@@ -1416,131 +1540,6 @@ class AdvancedAIAnalyzer:
|
|
| 1416 |
print("๐พ DB์์ ์ด์ ๋ฐ์ดํฐ ๋ก๋ ์๋...")
|
| 1417 |
return load_spaces_from_db()
|
| 1418 |
|
| 1419 |
-
def fetch_aitimes_news(self) -> List[Dict]:
|
| 1420 |
-
"""AI Times์์ ์ค๋ ๋ ์ง ๋ด์ค ํฌ๋กค๋ง"""
|
| 1421 |
-
import re
|
| 1422 |
-
|
| 1423 |
-
print("๐ฐ AI Times ๋ด์ค ์์ง ์ค...")
|
| 1424 |
-
|
| 1425 |
-
# ์์งํ URL ๋ชฉ๋ก
|
| 1426 |
-
urls = [
|
| 1427 |
-
'https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm',
|
| 1428 |
-
'https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm'
|
| 1429 |
-
]
|
| 1430 |
-
|
| 1431 |
-
all_news = []
|
| 1432 |
-
today = datetime.now().strftime('%m-%d') # ์: '10-10'
|
| 1433 |
-
|
| 1434 |
-
for url_idx, url in enumerate(urls, 1):
|
| 1435 |
-
try:
|
| 1436 |
-
print(f" ๐ [{url_idx}/2] ์์ง ์ค: {url}")
|
| 1437 |
-
response = requests.get(url, timeout=15, headers={
|
| 1438 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
| 1439 |
-
})
|
| 1440 |
-
response.raise_for_status()
|
| 1441 |
-
response.encoding = 'utf-8'
|
| 1442 |
-
|
| 1443 |
-
text = response.text
|
| 1444 |
-
|
| 1445 |
-
# ํจํด: [์ ๋ชฉ](๋งํฌ)...๋ ์ง
|
| 1446 |
-
# ์: [MS "๊ธ์ฆํ๋ '์ฑGPT' ์์๋ก ๋ฐ์ดํฐ์ผํฐ ๋ถ์กฑ...2026๋
๊น์ง ์ง์๋ ๋ฏ"](https://www.aitimes.com/news/articleView.html?idxno=203055)
|
| 1447 |
-
# ...
|
| 1448 |
-
# ์ฐ์
์ผ๋ฐ๋ฐ์ฐฌ ๊ธฐ์10-10 15:10
|
| 1449 |
-
|
| 1450 |
-
# ์ ๋ชฉ๊ณผ ๋งํฌ ๋งค์นญ ํจํด
|
| 1451 |
-
pattern = r'\[([^\]]+)\]\((https://www\.aitimes\.com/news/articleView\.html\?idxno=\d+)\)'
|
| 1452 |
-
|
| 1453 |
-
matches = re.finditer(pattern, text)
|
| 1454 |
-
|
| 1455 |
-
articles_found = 0
|
| 1456 |
-
for match in matches:
|
| 1457 |
-
try:
|
| 1458 |
-
title = match.group(1).strip()
|
| 1459 |
-
link = match.group(2).strip()
|
| 1460 |
-
|
| 1461 |
-
# ์ ๋ชฉ์ด ๋๋ฌด ์งง์ผ๋ฉด ์คํต
|
| 1462 |
-
if len(title) < 10:
|
| 1463 |
-
continue
|
| 1464 |
-
|
| 1465 |
-
# ํด๋น ๊ธฐ์ฌ์ ๋ ์ง ์ฐพ๊ธฐ (๋งํฌ ๋ค์์ 100์ ์ด๋ด)
|
| 1466 |
-
pos = match.end()
|
| 1467 |
-
nearby_text = text[pos:pos+200]
|
| 1468 |
-
|
| 1469 |
-
# ๋ ์ง ํจํด: 10-10 15:10 ํ์
|
| 1470 |
-
date_pattern = r'(\d{2}-\d{2}\s+\d{2}:\d{2})'
|
| 1471 |
-
date_match = re.search(date_pattern, nearby_text)
|
| 1472 |
-
|
| 1473 |
-
date_text = date_match.group(1) if date_match else today
|
| 1474 |
-
|
| 1475 |
-
# ์ค๋ ๋ ์ง๋ง ํํฐ๋ง
|
| 1476 |
-
if today not in date_text:
|
| 1477 |
-
continue
|
| 1478 |
-
|
| 1479 |
-
news_item = {
|
| 1480 |
-
'title': title,
|
| 1481 |
-
'url': link,
|
| 1482 |
-
'date': date_text,
|
| 1483 |
-
'source': 'AI Times',
|
| 1484 |
-
'category': 'AI'
|
| 1485 |
-
}
|
| 1486 |
-
|
| 1487 |
-
all_news.append(news_item)
|
| 1488 |
-
articles_found += 1
|
| 1489 |
-
|
| 1490 |
-
print(f" โ ์ถ๊ฐ: {title[:60]}... ({date_text})")
|
| 1491 |
-
|
| 1492 |
-
except Exception as e:
|
| 1493 |
-
continue
|
| 1494 |
-
|
| 1495 |
-
print(f" โ {articles_found}๊ฐ ์ค๋์ ๊ธฐ์ฌ ๋ฐ๊ฒฌ\n")
|
| 1496 |
-
time.sleep(1) # ์๋ฒ ๋ถํ ๋ฐฉ์ง
|
| 1497 |
-
|
| 1498 |
-
except Exception as e:
|
| 1499 |
-
print(f" โ ๏ธ URL ์์ง ์ค๋ฅ: {e}\n")
|
| 1500 |
-
continue
|
| 1501 |
-
|
| 1502 |
-
# ์ค๋ณต ์ ๊ฑฐ (URL ๊ธฐ์ค)
|
| 1503 |
-
unique_news = []
|
| 1504 |
-
seen_urls = set()
|
| 1505 |
-
for news in all_news:
|
| 1506 |
-
if news['url'] not in seen_urls:
|
| 1507 |
-
unique_news.append(news)
|
| 1508 |
-
seen_urls.add(news['url'])
|
| 1509 |
-
|
| 1510 |
-
print(f"โ
์ด {len(unique_news)}๊ฐ ์ค๋ณต ์ ๊ฑฐ๋ ์ค๋์ ๋ด์ค\n")
|
| 1511 |
-
|
| 1512 |
-
# ์ต์ 3๊ฐ๋ ๋ณด์ฅ (์์ผ๋ฉด ์ํ ์ถ๊ฐ)
|
| 1513 |
-
if len(unique_news) < 3:
|
| 1514 |
-
print("โ ๏ธ ๋ด์ค๊ฐ ๋ถ์กฑํ์ฌ ์ต๊ทผ ์ํ ์ถ๊ฐ\n")
|
| 1515 |
-
sample_news = [
|
| 1516 |
-
{
|
| 1517 |
-
'title': 'MS "์ฑGPT ์์ ํญ์ฆ์ผ๋ก ๋ฐ์ดํฐ์ผํฐ ๋ถ์กฑ...2026๋
๊น์ง ์ง์"',
|
| 1518 |
-
'url': 'https://www.aitimes.com/news/articleView.html?idxno=203055',
|
| 1519 |
-
'date': '10-10 15:10',
|
| 1520 |
-
'source': 'AI Times',
|
| 1521 |
-
'category': 'AI'
|
| 1522 |
-
},
|
| 1523 |
-
{
|
| 1524 |
-
'title': '๋ฏธ๊ตญ, UAE์ GPU ํ๋งค ์ผ๋ถ ์น์ธ...์๋น๋์ ์์ด 5์กฐ๋ฌ๋ฌ ๋์',
|
| 1525 |
-
'url': 'https://www.aitimes.com/news/articleView.html?idxno=203053',
|
| 1526 |
-
'date': '10-10 14:46',
|
| 1527 |
-
'source': 'AI Times',
|
| 1528 |
-
'category': 'AI'
|
| 1529 |
-
},
|
| 1530 |
-
{
|
| 1531 |
-
'title': '์๋ผ, ์ฑGPT๋ณด๋ค ๋นจ๋ฆฌ 100๋ง ๋ค์ด๋ก๋ ๋ํ',
|
| 1532 |
-
'url': 'https://www.aitimes.com/news/articleView.html?idxno=203045',
|
| 1533 |
-
'date': '10-10 12:55',
|
| 1534 |
-
'source': 'AI Times',
|
| 1535 |
-
'category': 'AI'
|
| 1536 |
-
}
|
| 1537 |
-
]
|
| 1538 |
-
for sample in sample_news:
|
| 1539 |
-
if sample['url'] not in seen_urls:
|
| 1540 |
-
unique_news.append(sample)
|
| 1541 |
-
|
| 1542 |
-
return unique_news[:20] # ์ต๋ 20๊ฐ
|
| 1543 |
-
|
| 1544 |
def analyze_all_news(self) -> List[Dict]:
|
| 1545 |
"""๋ชจ๋ ๋ด์ค์ LLM ๋ถ์ ์ถ๊ฐ"""
|
| 1546 |
print("๐ฐ ๋ด์ค LLM ๋ถ์ ์์...")
|
|
@@ -1804,5 +1803,5 @@ if __name__ == '__main__':
|
|
| 1804 |
print("\n\n๐ ์๋ฒ ์ข
๋ฃ!")
|
| 1805 |
sys.exit(0)
|
| 1806 |
except Exception as e:
|
| 1807 |
-
print(f"\n
|
| 1808 |
sys.exit(1)
|
|
|
|
| 35 |
import time
|
| 36 |
from huggingface_hub import HfApi
|
| 37 |
from bs4 import BeautifulSoup
|
| 38 |
+
import re
|
| 39 |
|
| 40 |
# Flask ์ฑ ์ด๊ธฐํ
|
| 41 |
app = Flask(__name__)
|
|
|
|
| 1283 |
}
|
| 1284 |
self.news_data = []
|
| 1285 |
|
| 1286 |
+
def fetch_aitimes_news(self) -> List[Dict]:
|
| 1287 |
+
"""AI Times์์ ์ค๋ ๋ ์ง ๋ด์ค ํฌ๋กค๋ง"""
|
| 1288 |
+
print("๐ฐ AI Times ๋ด์ค ์์ง ์ค...")
|
| 1289 |
+
|
| 1290 |
+
# ์์งํ URL ๋ชฉ๋ก
|
| 1291 |
+
urls = [
|
| 1292 |
+
'https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm',
|
| 1293 |
+
'https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm'
|
| 1294 |
+
]
|
| 1295 |
+
|
| 1296 |
+
all_news = []
|
| 1297 |
+
today = datetime.now().strftime('%m-%d') # ์: '10-10'
|
| 1298 |
+
|
| 1299 |
+
for url_idx, url in enumerate(urls, 1):
|
| 1300 |
+
try:
|
| 1301 |
+
print(f" ๐ [{url_idx}/2] ์์ง ์ค: {url}")
|
| 1302 |
+
response = requests.get(url, timeout=15, headers={
|
| 1303 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
| 1304 |
+
})
|
| 1305 |
+
response.raise_for_status()
|
| 1306 |
+
response.encoding = 'utf-8'
|
| 1307 |
+
|
| 1308 |
+
text = response.text
|
| 1309 |
+
|
| 1310 |
+
# ํจํด: [์ ๋ชฉ](๋งํฌ)...๋ ์ง
|
| 1311 |
+
# ์: [MS "๊ธ์ฆํ๋ '์ฑGPT' ์์๋ก ๋ฐ์ดํฐ์ผํฐ ๋ถ์กฑ...2026๋
๊น์ง ์ง์๋ ๋ฏ"](https://www.aitimes.com/news/articleView.html?idxno=203055)
|
| 1312 |
+
# ...
|
| 1313 |
+
# ์ฐ์
์ผ๋ฐ๋ฐ์ฐฌ ๊ธฐ์10-10 15:10
|
| 1314 |
+
|
| 1315 |
+
# ์ ๋ชฉ๊ณผ ๋งํฌ ๋งค์นญ ํจํด
|
| 1316 |
+
pattern = r'\[([^\]]+)\]\((https://www\.aitimes\.com/news/articleView\.html\?idxno=\d+)\)'
|
| 1317 |
+
|
| 1318 |
+
matches = re.finditer(pattern, text)
|
| 1319 |
+
|
| 1320 |
+
articles_found = 0
|
| 1321 |
+
for match in matches:
|
| 1322 |
+
try:
|
| 1323 |
+
title = match.group(1).strip()
|
| 1324 |
+
link = match.group(2).strip()
|
| 1325 |
+
|
| 1326 |
+
# ์ ๋ชฉ์ด ๋๋ฌด ์งง์ผ๋ฉด ์คํต
|
| 1327 |
+
if len(title) < 10:
|
| 1328 |
+
continue
|
| 1329 |
+
|
| 1330 |
+
# ํด๋น ๊ธฐ์ฌ์ ๋ ์ง ์ฐพ๊ธฐ (๋งํฌ ๋ค์์ 100์ ์ด๋ด)
|
| 1331 |
+
pos = match.end()
|
| 1332 |
+
nearby_text = text[pos:pos+200]
|
| 1333 |
+
|
| 1334 |
+
# ๋ ์ง ํจํด: 10-10 15:10 ํ์
|
| 1335 |
+
date_pattern = r'(\d{2}-\d{2}\s+\d{2}:\d{2})'
|
| 1336 |
+
date_match = re.search(date_pattern, nearby_text)
|
| 1337 |
+
|
| 1338 |
+
date_text = date_match.group(1) if date_match else today
|
| 1339 |
+
|
| 1340 |
+
# ์ค๋ ๋ ์ง๋ง ํํฐ๋ง
|
| 1341 |
+
if today not in date_text:
|
| 1342 |
+
continue
|
| 1343 |
+
|
| 1344 |
+
news_item = {
|
| 1345 |
+
'title': title,
|
| 1346 |
+
'url': link,
|
| 1347 |
+
'date': date_text,
|
| 1348 |
+
'source': 'AI Times',
|
| 1349 |
+
'category': 'AI'
|
| 1350 |
+
}
|
| 1351 |
+
|
| 1352 |
+
all_news.append(news_item)
|
| 1353 |
+
articles_found += 1
|
| 1354 |
+
|
| 1355 |
+
print(f" โ ์ถ๊ฐ: {title[:60]}... ({date_text})")
|
| 1356 |
+
|
| 1357 |
+
except Exception as e:
|
| 1358 |
+
continue
|
| 1359 |
+
|
| 1360 |
+
print(f" โ {articles_found}๊ฐ ์ค๋์ ๊ธฐ์ฌ ๋ฐ๊ฒฌ\n")
|
| 1361 |
+
time.sleep(1) # ์๋ฒ ๋ถํ ๋ฐฉ์ง
|
| 1362 |
+
|
| 1363 |
+
except Exception as e:
|
| 1364 |
+
print(f" โ ๏ธ URL ์์ง ์ค๋ฅ: {e}\n")
|
| 1365 |
+
continue
|
| 1366 |
+
|
| 1367 |
+
# ์ค๋ณต ์ ๊ฑฐ (URL ๊ธฐ์ค)
|
| 1368 |
+
unique_news = []
|
| 1369 |
+
seen_urls = set()
|
| 1370 |
+
for news in all_news:
|
| 1371 |
+
if news['url'] not in seen_urls:
|
| 1372 |
+
unique_news.append(news)
|
| 1373 |
+
seen_urls.add(news['url'])
|
| 1374 |
+
|
| 1375 |
+
print(f"โ
์ด {len(unique_news)}๊ฐ ์ค๋ณต ์ ๊ฑฐ๋ ์ค๋์ ๋ด์ค\n")
|
| 1376 |
+
|
| 1377 |
+
# ์ต์ 3๊ฐ๋ ๋ณด์ฅ (์์ผ๋ฉด ์ํ ์ถ๊ฐ)
|
| 1378 |
+
if len(unique_news) < 3:
|
| 1379 |
+
print("โ ๏ธ ๋ด์ค๊ฐ ๋ถ์กฑํ์ฌ ์ต๊ทผ ์ํ ์ถ๊ฐ\n")
|
| 1380 |
+
sample_news = [
|
| 1381 |
+
{
|
| 1382 |
+
'title': 'MS "์ฑGPT ์์ ํญ์ฆ์ผ๋ก ๋ฐ์ดํฐ์ผํฐ ๋ถ์กฑ...2026๋
๊น์ง ์ง์"',
|
| 1383 |
+
'url': 'https://www.aitimes.com/news/articleView.html?idxno=203055',
|
| 1384 |
+
'date': '10-10 15:10',
|
| 1385 |
+
'source': 'AI Times',
|
| 1386 |
+
'category': 'AI'
|
| 1387 |
+
},
|
| 1388 |
+
{
|
| 1389 |
+
'title': '๋ฏธ๊ตญ, UAE์ GPU ํ๋งค ์ผ๋ถ ์น์ธ...์๋น๋์ ์์ด 5์กฐ๋ฌ๋ฌ ๋์',
|
| 1390 |
+
'url': 'https://www.aitimes.com/news/articleView.html?idxno=203053',
|
| 1391 |
+
'date': '10-10 14:46',
|
| 1392 |
+
'source': 'AI Times',
|
| 1393 |
+
'category': 'AI'
|
| 1394 |
+
},
|
| 1395 |
+
{
|
| 1396 |
+
'title': '์๋ผ, ์ฑGPT๋ณด๋ค ๋นจ๋ฆฌ 100๋ง ๋ค์ด๋ก๋ ๋ํ',
|
| 1397 |
+
'url': 'https://www.aitimes.com/news/articleView.html?idxno=203045',
|
| 1398 |
+
'date': '10-10 12:55',
|
| 1399 |
+
'source': 'AI Times',
|
| 1400 |
+
'category': 'AI'
|
| 1401 |
+
}
|
| 1402 |
+
]
|
| 1403 |
+
for sample in sample_news:
|
| 1404 |
+
if sample['url'] not in seen_urls:
|
| 1405 |
+
unique_news.append(sample)
|
| 1406 |
+
|
| 1407 |
+
return unique_news[:20] # ์ต๋ 20๊ฐ
|
| 1408 |
+
|
| 1409 |
def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]:
|
| 1410 |
"""ํ๊น
ํ์ด์ค ํธ๋ ๋ฉ ๋ชจ๋ธ 30๊ฐ ์์ง (์ค์ API)"""
|
| 1411 |
print(f"๐ค ํ๊น
ํ์ด์ค ํธ๋ ๋ฉ ๋ชจ๋ธ {limit}๊ฐ ์์ง ์ค...")
|
|
|
|
| 1540 |
print("๐พ DB์์ ์ด์ ๋ฐ์ดํฐ ๋ก๋ ์๋...")
|
| 1541 |
return load_spaces_from_db()
|
| 1542 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1543 |
def analyze_all_news(self) -> List[Dict]:
|
| 1544 |
"""๋ชจ๋ ๋ด์ค์ LLM ๋ถ์ ์ถ๊ฐ"""
|
| 1545 |
print("๐ฐ ๋ด์ค LLM ๋ถ์ ์์...")
|
|
|
|
| 1803 |
print("\n\n๐ ์๋ฒ ์ข
๋ฃ!")
|
| 1804 |
sys.exit(0)
|
| 1805 |
except Exception as e:
|
| 1806 |
+
print(f"\nโ์๋ฒ ์ค๋ฅ: {e}")
|
| 1807 |
sys.exit(1)
|