# -*- coding: utf-8 -*- from flask import Flask, render_template_string, jsonify, request import requests import json from datetime import datetime from typing import List, Dict, Optional import os import sys import sqlite3 import time from huggingface_hub import HfApi from bs4 import BeautifulSoup import re # Flask 앱 초기화 app = Flask(__name__) app.config['JSON_AS_ASCII'] = False # 데이터베이스 파일 경로 DB_PATH = 'ai_news_analysis.db' # ============================================ # HTML 템플릿 (탭 UI 포함) # ============================================ HTML_TEMPLATE = """ AI 뉴스 & 허깅페이스 LLM 분석 시스템

🤖 AI 뉴스 & 허깅페이스 LLM 분석

AI 트렌드 분석 시스템 🎓

{{ stats.total_news }}

📰 분석된 뉴스

{{ stats.hf_models }}

🤗 트렌딩 모델

{{ stats.hf_spaces }}

🚀 인기 스페이스

{{ stats.llm_analyses }}

🧠 LLM 분석

{% for article in analyzed_news %}

{{ loop.index }}. {{ article.title }}

📅 {{ article.date }} 📰 {{ article.source }}

🎯 쉬운 요약

{{ article.analysis.summary }}

💡 왜 중요할까?

{{ article.analysis.significance }}

📊 영향도 {{ article.analysis.impact_text }}

{{ article.analysis.impact_description }}

✅ 우리가 할 수 있는 것

{{ article.analysis.action }}

🔗 전체 기사 읽어보기

{% endfor %}

{% for model in analyzed_models %}

{{ model.rank }}

{{ model.name }}

🏷️ {{ model.task }}

📥 다운로드
{{ "{:,}".format(model.downloads) }}

❤️ 좋아요
{{ "{:,}".format(model.likes) }}

🧠 AI 분석:
{{ model.analysis }}

🔗 모델 페이지 방문

{% endfor %}

{% if analyzed_models|length == 0 %}

⚠️ 모델 데이터를 불러오는 중...

{% endif %}

{% for space in analyzed_spaces %}

{{ space.rank }}. {{ space.name }}

트렌딩 {{ space.rank }}위

📝 설명: {{ space.description }}

🎓 쉬운 설명:
{{ space.simple_explanation }}

{% if space.tech_stack %}

🛠️ 사용 기술: {% for tech in space.tech_stack %} {{ tech }} {% endfor %}

{% endif %} 🔗 스페이스 체험하기

{% endfor %} {% if analyzed_spaces|length == 0 %}

⚠️ 스페이스 데이터를 불러오는 중...

{% endif %}

⏰ 마지막 업데이트: {{ timestamp }}

""" # ============================================ # 데이터베이스 초기화 # ============================================ def init_database(): """SQLite 데이터베이스 초기화""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # 뉴스 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS news ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL, url TEXT NOT NULL UNIQUE, date TEXT, source TEXT, category TEXT, summary TEXT, significance TEXT, impact_level TEXT, impact_text TEXT, impact_description TEXT, action TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') # 모델 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS models ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL UNIQUE, downloads INTEGER, likes INTEGER, task TEXT, url TEXT, analysis TEXT, rank INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') # 스페이스 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS spaces ( id INTEGER PRIMARY KEY AUTOINCREMENT, space_id TEXT NOT NULL UNIQUE, name TEXT NOT NULL, author TEXT, title TEXT, likes INTEGER, url TEXT, sdk TEXT, simple_explanation TEXT, tech_stack TEXT, rank INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') conn.commit() conn.close() print("✅ 데이터베이스 초기화 완료") def save_news_to_db(news_list: List[Dict]): """뉴스 데이터를 DB에 저장""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() saved_count = 0 for news in news_list: try: cursor.execute(''' INSERT OR REPLACE INTO news (title, url, date, source, category, summary, significance, impact_level, impact_text, impact_description, action) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( news['title'], news['url'], news.get('date', ''), news.get('source', ''), news.get('category', ''), news['analysis']['summary'], news['analysis']['significance'], news['analysis']['impact_level'], news['analysis']['impact_text'], news['analysis']['impact_description'], news['analysis']['action'] )) saved_count += 1 except sqlite3.IntegrityError: pass # 이미 존재하는 뉴스 conn.commit() conn.close() print(f"✅ {saved_count}개 뉴스 DB 저장 완료") def save_models_to_db(models_list: List[Dict]): """모델 데이터를 DB에 저장""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() saved_count = 0 for model in models_list: try: cursor.execute(''' INSERT OR REPLACE INTO models (name, downloads, likes, task, url, analysis, rank, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP) ''', ( model['name'], model['downloads'], model['likes'], model['task'], model['url'], model['analysis'], model['rank'] )) saved_count += 1 except Exception as e: print(f"⚠️ 모델 저장 오류: {e}") conn.commit() conn.close() print(f"✅ {saved_count}개 모델 DB 저장 완료") def save_spaces_to_db(spaces_list: List[Dict]): """스페이스 데이터를 DB에 저장""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() saved_count = 0 for space in spaces_list: try: cursor.execute(''' INSERT OR REPLACE INTO spaces (space_id, name, author, title, likes, url, sdk, simple_explanation, tech_stack, rank, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP) ''', ( space['space_id'], space['name'], space.get('author', ''), space.get('title', ''), space.get('likes', 0), space['url'], space.get('sdk', ''), space['simple_explanation'], json.dumps(space.get('tech_stack', [])), space['rank'] )) saved_count += 1 except Exception as e: print(f"⚠️ 스페이스 저장 오류: {e}") conn.commit() conn.close() print(f"✅ {saved_count}개 스페이스 DB 저장 완료") def load_news_from_db() -> List[Dict]: """DB에서 뉴스 로드""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' SELECT title, url, date, source, category, summary, significance, impact_level, impact_text, impact_description, action FROM news ORDER BY created_at DESC LIMIT 50 ''') news_list = [] for row in cursor.fetchall(): news_list.append({ 'title': row[0], 'url': row[1], 'date': row[2], 'source': row[3], 'category': row[4], 'analysis': { 'summary': row[5], 'significance': row[6], 'impact_level': row[7], 'impact_text': row[8], 'impact_description': row[9], 'action': row[10] } }) conn.close() return news_list def load_models_from_db() -> List[Dict]: """DB에서 모델 로드""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' SELECT name, downloads, likes, task, url, analysis, rank FROM models ORDER BY rank ASC LIMIT 30 ''') models_list = [] for row in cursor.fetchall(): models_list.append({ 'name': row[0], 'downloads': row[1], 'likes': row[2], 'task': row[3], 'url': row[4], 'analysis': row[5], 'rank': row[6] }) conn.close() return models_list def load_spaces_from_db() -> List[Dict]: """DB에서 스페이스 로드""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' SELECT space_id, name, author, title, likes, url, sdk, simple_explanation, tech_stack, rank FROM spaces ORDER BY rank ASC LIMIT 30 ''') spaces_list = [] for row in cursor.fetchall(): spaces_list.append({ 'space_id': row[0], 'name': row[1], 'author': row[2], 'title': row[3], 'likes': row[4], 'url': row[5], 'sdk': row[6], 'simple_explanation': row[7], 'tech_stack': json.loads(row[8]) if row[8] else [], 'rank': row[9], 'description': row[3] # title을 description으로 사용 }) conn.close() return spaces_list # ============================================ # LLM 분석기 클래스 # ============================================ class LLMAnalyzer: """Fireworks AI (Qwen3) 기반 LLM 분석기""" def __init__(self): self.api_key = os.environ.get('FIREWORKS_API_KEY', '') self.api_url = "https://api.fireworks.ai/inference/v1/chat/completions" self.api_available = bool(self.api_key) if not self.api_available: print("⚠️ FIREWORKS_API_KEY 환경변수가 설정되지 않았습니다. 템플릿 모드로 동작합니다.") def call_llm(self, messages: List[Dict], max_tokens: int = 2000) -> str: """Fireworks AI API 호출""" if not self.api_available: return None try: payload = { "model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507", "max_tokens": max_tokens, "top_p": 1, "top_k": 40, "presence_penalty": 0, "frequency_penalty": 0, "temperature": 0.6, "messages": messages } headers = { "Accept": "application/json", "Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}" } response = requests.post(self.api_url, headers=headers, json=payload, timeout=30) response.raise_for_status() result = response.json() return result['choices'][0]['message']['content'] except Exception as e: print(f" ⚠️ LLM API 호출 오류: {e}") return None def fetch_model_card(self, model_id: str) -> str: """허깅페이스 모델 카드(README.md) 가져오기""" try: url = f"https://huggingface.co/{model_id}/raw/main/README.md" response = requests.get(url, timeout=10) if response.status_code == 200: content = response.text # 너무 긴 경우 앞부분만 (약 3000자) if len(content) > 3000: content = content[:3000] + "\n...(후략)" return content else: return None except Exception as e: print(f" ⚠️ 모델 카드 가져오기 오류: {e}") return None def fetch_space_code(self, space_id: str) -> str: """허깅페이스 스페이스 app.py 가져오기""" try: url = f"https://huggingface.co/spaces/{space_id}/raw/main/app.py" response = requests.get(url, timeout=10) if response.status_code == 200: content = response.text # 너무 긴 경우 앞부분만 (약 2000자) if len(content) > 2000: content = content[:2000] + "\n...(후략)" return content else: return None except Exception as e: print(f" ⚠️ 스페이스 코드 가져오기 오류: {e}") return None def analyze_news_simple(self, title: str, content: str = "") -> Dict: """뉴스 기사를 중고등학생 수준으로 분석""" analysis_templates = { "챗GPT": { "summary": "마이크로소프트(MS)는 챗GPT의 폭발적인 사용량 증가로 인해 데이터센터 용량이 부족한 상황에 직면했습니다. 현재 미국 내 여러 지역에서 물리적 공간과 서버가 모두 부족한 상태이며, 이로 인해 버지니아와 텍사스 등 핵심 지역에서는 2026년 상반기까지 신규 Azure 클라우드 구독이 제한될 것으로 예상됩니다. 이는 생성형 AI 서비스의 급격한 성장이 가져온 인프라 공급 문제를 여실히 보여주는 사례입니다.", "significance": "이 뉴스는 AI 기술의 대중화 속도가 기업들의 예상을 훨씬 뛰어넘고 있음을 보여줍니다. MS 같은 글로벌 IT 기업도 AI 수요를 따라잡기 위해 고군분투하고 있으며, 이는 AI가 단순한 유행이 아닌 산업 전반을 변화시키는 핵심 기술임을 증명합니다.", "impact_level": "high", "impact_text": "높음", "impact_description": "클라우드 인프라 부족은 AI 서비스 확장에 직접적인 영향을 미치며, 향후 AI 기술 접근성과 비용 구조를 변화시킬 수 있습니다.", "action": "챗GPT나 Claude 같은 AI 도구를 활용한 학습 방법을 익히세요. 보고서 작성, 코딩 학습, 외국어 공부 등 다양한 분야에서 AI를 학습 보조 도구로 사용할 수 있습니다." }, "GPU": { "summary": "미국 정부가 아랍에미리트(UAE)에 최첨단 AI 칩(GPU) 수출을 승인했습니다. 이번 승인은 UAE 내 미국 기업이 운영하는 데이터센터에 한정되며, 오픈AI 전용 5GW 규모 데이터센터 구축에 사용될 예정입니다. GPU는 AI 모델 학습에 필수적인 하드웨어로, 엔비디아가 시장을 주도하고 있으며 이번 결정으로 엔비디아의 시가총액이 5조 달러에 근접할 것으로 전망됩니다.", "significance": "이는 미국의 AI 기술 수출 정책 변화를 보여주는 중요한 신호입니다. 기술 패권 경쟁 속에서도 전략적 동맹국과의 협력을 통해 AI 생태계를 확장하려는 미국의 의도를 엿볼 수 있습니다.", "impact_level": "medium", "impact_text": "중간", "impact_description": "AI 하드웨어 공급망의 지정학적 변화는 글로벌 AI 산업 지형도에 영향을 미칠 수 있으며, 특히 반도체 산업과 국제 관계에 중요한 의미를 가집니다.", "action": "컴퓨터 하드웨어, 특히 GPU의 작동 원리와 AI 학습에서의 역할을 공부해보세요. 병렬 처리, 행렬 연산 등의 개념을 이해하면 AI 기술의 근간을 파악할 수 있습니다." }, "소라": { "summary": "오픈AI의 AI 동영상 생성 앱 '소라(Sora)'가 출시 5일 만에 100만 다운로드를 돌파했습니다. 이는 챗GPT보다 빠른 성장 속도이며, 초대 전용(invite-only) 앱임을 고려하면 더욱 놀라운 기록입니다. 소라는 텍스트 프롬프트만으로 고품질 동영상을 생성할 수 있는 생성형 AI 도구로, 미국과 캐나다에서 iOS 전용으로 출시되었습니다.", "significance": "텍스트를 이미지로 변환하는 기술에서 더 나아가 동영상 생성까지 가능해진 것은 AI 기술의 진화를 보여줍니다. 콘텐츠 제작의 민주화가 가속화되고 있으며, 누구나 쉽게 고품질 영상을 만들 수 있는 시대가 열리고 있습니다.", "impact_level": "high", "impact_text": "높음", "impact_description": "영상 제작 산업의 패러다임이 변화하고 있으며, 교육, 마케팅, 엔터테인먼트 등 다양한 분야에서 AI 동영상 생성 기술의 활용이 증가할 것으로 예상됩니다.", "action": "AI 동영상 생성 도구의 가능성과 한계를 탐구해보세요. 창의적인 아이디어를 시각화하는 방법을 배우고, 동시에 딥페이크 같은 악용 사례에 대한 비판적 사고도 함양하세요." } } # 키워드 매칭으로 템플릿 선택 for keyword, template in analysis_templates.items(): if keyword.lower() in title.lower(): return template # 기본 분석 (중고등학생 수준) return { "summary": f"'{title}'와 관련된 최신 AI 기술 동향입니다. 인공지능 분야는 빠르게 발전하고 있으며, 이러한 기술 변화는 우리의 일상생활과 미래 직업 세계에 큰 영향을 미칠 것으로 예상됩니다. 관련 기술의 원리와 사회적 파급효과를 함께 이해하는 것이 중요합니다.", "significance": "AI 기술의 발전은 단순한 기술 혁신을 넘어 사회, 경제, 윤리적 측면에서 다양한 논의를 불러일으키고 있습니다. 이러한 변화를 이해하고 대비하는 것이 미래 세대에게 중요한 역량입니다.", "impact_level": "medium", "impact_text": "중간", "impact_description": "AI 기술의 발전은 교육, 취업, 산업 전반에 걸쳐 구조적 변화를 가져올 것이며, 이에 대한 이해와 준비가 필요합니다.", "action": "AI 기술의 기본 원리를 학습하고, 관련 프로그래밍(Python 등)이나 데이터 과학 기초를 공부해보세요. 또한 AI 윤리와 사회적 영향에 대해서도 비판적으로 사고하는 습관을 기르세요." } def analyze_model(self, model_name: str, task: str, downloads: int) -> str: """허깅페이스 모델 분석 - 모델 카드를 LLM으로 분석""" # 1. 모델 카드 가져오기 model_card = self.fetch_model_card(model_name) # 2. LLM으로 분석 if model_card and self.api_available: try: messages = [ { "role": "system", "content": "당신은 중고등학생도 이해할 수 있게 AI 모델을 쉽게 설명하는 전문가입니다. 한국어로 답변하세요." }, { "role": "user", "content": f"""다음은 허깅페이스 모델 '{model_name}'의 모델 카드입니다: {model_card} 이 모델을 중고등학생이 이해할 수 있도록 3-4문장으로 쉽게 설명해주세요. 다음 내용을 포함하세요: 1. 이 모델이 무엇을 하는지 2. 어떤 특징이 있는지 3. 누가 사용하면 좋은지 답변은 반드시 3-4문장의 한국어로만 작성하세요.""" } ] result = self.call_llm(messages, max_tokens=500) if result: return result.strip() except Exception as e: print(f" ⚠️ 모델 분석 LLM 오류: {e}") # 3. Fallback: 템플릿 기반 설명 task_explanations = { "text-generation": "글을 자동으로 만들어주는", "image-to-text": "사진을 보고 설명을 써주는", "text-to-image": "글을 읽고 그림을 그려주는", "translation": "다른 언어로 번역해주는", "question-answering": "질문에 답해주는", "summarization": "긴 글을 짧게 요약해주는", "text-classification": "글을 분류해주는", "token-classification": "단어를 분석해주는", "fill-mask": "빈칸을 채워주는" } task_desc = task_explanations.get(task, "특별한 기능을 하는") if downloads > 10000000: popularity = "엄청나게 많은" elif downloads > 1000000: popularity = "아주 많은" elif downloads > 100000: popularity = "많은" else: popularity = "어느 정도" return f"이 모델은 {task_desc} AI예요. {popularity} 사람들이 다운로드해서 사용하고 있어요. {model_name.split('/')[-1]}라는 이름으로 유명해요!" def analyze_space(self, space_name: str, space_id: str, description: str) -> Dict: """허깅페이스 스페이스 분석 - app.py를 LLM으로 분석""" # 1. app.py 코드 가져오기 app_code = self.fetch_space_code(space_id) # 2. LLM으로 분석 if app_code and self.api_available: try: messages = [ { "role": "system", "content": "당신은 중고등학생도 이해할 수 있게 AI 애플리케이션을 쉽게 설명하는 전문가입니다. 한국어로 답변하세요." }, { "role": "user", "content": f"""다음은 허깅페이스 스페이스 '{space_name}'의 app.py 코드입니다: {app_code} 이 앱을 중고등학생이 이해할 수 있도록 3-4문장으로 쉽게 설명해주세요. 다음 내용을 포함하세요: 1. 이 앱이 무엇을 하는지 2. 어떤 기술을 사용하는지 3. 어떻게 활용할 수 있는지 답변은 반드시 3-4문장의 한국어로만 작성하세요.""" } ] result = self.call_llm(messages, max_tokens=500) if result: # 기술 스택 추출 시도 tech_stack = [] if 'gradio' in app_code.lower(): tech_stack.append('Gradio') if 'streamlit' in app_code.lower(): tech_stack.append('Streamlit') if 'transformers' in app_code.lower(): tech_stack.append('Transformers') if 'torch' in app_code.lower() or 'pytorch' in app_code.lower(): tech_stack.append('PyTorch') if 'tensorflow' in app_code.lower(): tech_stack.append('TensorFlow') if 'diffusers' in app_code.lower(): tech_stack.append('Diffusers') if not tech_stack: tech_stack = ['Python', 'AI'] return { "simple_explanation": result.strip(), "tech_stack": tech_stack } except Exception as e: print(f" ⚠️ 스페이스 분석 LLM 오류: {e}") # 3. Fallback: 템플릿 기반 설명 return { "simple_explanation": f"{space_name}는 웹브라우저에서 바로 AI를 체험해볼 수 있는 곳이에요. 설치 없이도 사용할 수 있어서 편리해요! 마치 온라인 게임처럼 바로 접속해서 AI를 사용할 수 있답니다.", "tech_stack": ["Python", "Gradio", "Transformers", "PyTorch"] } # ============================================ # 고급 분석기 클래스 # ============================================ class AdvancedAIAnalyzer: """LLM 기반 고급 AI 뉴스 분석기""" def __init__(self): self.llm_analyzer = LLMAnalyzer() self.huggingface_data = { "models": [], "spaces": [] } self.news_data = [] def fetch_aitimes_news(self) -> List[Dict]: """AI Times에서 오늘 날짜 뉴스 크롤링""" print("📰 AI Times 뉴스 수집 중...") # 수집할 URL 목록 urls = [ 'https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm', 'https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm' ] all_news = [] today = datetime.now().strftime('%m-%d') # 예: '10-10' for url_idx, url in enumerate(urls, 1): try: print(f" 🔍 [{url_idx}/2] 수집 중: {url}") response = requests.get(url, timeout=15, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' }) response.raise_for_status() response.encoding = 'utf-8' soup = BeautifulSoup(response.text, 'html.parser') # 모든 링크 찾기 articles = soup.find_all('a', href=re.compile(r'/news/articleView\.html\?idxno=\d+')) print(f" → {len(articles)}개 링크 발견") articles_found = 0 for article_tag in articles: try: # 제목과 링크 title = article_tag.get_text(strip=True) link = article_tag.get('href', '') # 링크 정규화 if link and not link.startswith('http'): if link.startswith('/'): link = 'https://www.aitimes.com' + link else: link = 'https://www.aitimes.com/' + link # 제목이 너무 짧으면 스킵 if not title or len(title) < 10: continue # 부모 요소에서 날짜 찾기 parent = article_tag.parent date_text = '' # 부모의 모든 텍스트에서 날짜 패턴 찾기 if parent: parent_text = parent.get_text() date_match = re.search(r'(\d{2}-\d{2}\s+\d{2}:\d{2})', parent_text) if date_match: date_text = date_match.group(1) # 날짜가 없으면 다음 형제 요소들 확인 if not date_text: for sibling in article_tag.find_next_siblings(): sibling_text = sibling.get_text() date_match = re.search(r'(\d{2}-\d{2}\s+\d{2}:\d{2})', sibling_text) if date_match: date_text = date_match.group(1) break # 날짜가 여전히 없으면 오늘 날짜 사용 if not date_text: date_text = today # 오늘 날짜만 필터링 if today not in date_text: continue news_item = { 'title': title, 'url': link, 'date': date_text, 'source': 'AI Times', 'category': 'AI' } all_news.append(news_item) articles_found += 1 print(f" ✓ 추가: {title[:60]}... ({date_text})") except Exception as e: continue print(f" → {articles_found}개 오늘자 기사 수집\n") time.sleep(1) # 서버 부하 방지 except Exception as e: print(f" ⚠️ URL 수집 오류: {e}\n") continue # 중복 제거 (URL 기준) unique_news = [] seen_urls = set() for news in all_news: if news['url'] not in seen_urls: unique_news.append(news) seen_urls.add(news['url']) print(f"✅ 총 {len(unique_news)}개 중복 제거된 오늘자 뉴스\n") # 최소 3개는 보장 (없으면 샘플 추가) if len(unique_news) < 3: print("⚠️ 뉴스가 부족하여 최근 샘플 추가\n") sample_news = [ { 'title': 'MS "챗GPT 수요 폭증으로 데이터센터 부족...2026년까지 지속"', 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203055', 'date': '10-10 15:10', 'source': 'AI Times', 'category': 'AI' }, { 'title': '미국, UAE에 GPU 판매 일부 승인...엔비디아 시총 5조달러 눈앞', 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203053', 'date': '10-10 14:46', 'source': 'AI Times', 'category': 'AI' }, { 'title': '소라, 챗GPT보다 빨리 100만 다운로드 돌파', 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203045', 'date': '10-10 12:55', 'source': 'AI Times', 'category': 'AI' } ] for sample in sample_news: if sample['url'] not in seen_urls: unique_news.append(sample) return unique_news[:20] # 최대 20개 def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]: """허깅페이스 트렌딩 모델 30개 수집 (실제 API)""" print(f"🤗 허깅페이스 트렌딩 모델 {limit}개 수집 중...") models_list = [] try: # Hugging Face API 사용 api = HfApi() # trending 순위로 모델 가져오기 models = list(api.list_models( sort="trending_score", direction=-1, limit=limit )) print(f"📊 API에서 {len(models)}개 모델 받음") for idx, model in enumerate(models[:limit], 1): try: model_info = { 'name': model.id, 'downloads': getattr(model, 'downloads', 0) or 0, 'likes': getattr(model, 'likes', 0) or 0, 'task': getattr(model, 'pipeline_tag', 'N/A') or 'N/A', 'url': f"https://huggingface.co/{model.id}", 'rank': idx } # LLM 분석 추가 (모델 카드 분석) print(f" 🔍 {idx}. {model.id} 분석 중...") model_info['analysis'] = self.llm_analyzer.analyze_model( model_info['name'], model_info['task'], model_info['downloads'] ) models_list.append(model_info) # API rate limit 방지를 위한 짧은 대기 time.sleep(0.5) # 진행상황 표시 if idx % 5 == 0: print(f" ✓ {idx}개 모델 처리 완료...") except Exception as e: print(f" ⚠️ 모델 {idx} 처리 오류: {e}") continue print(f"✅ {len(models_list)}개 트렌딩 모델 수집 완료") # DB에 저장 if models_list: save_models_to_db(models_list) return models_list except Exception as e: print(f"❌ 모델 수집 오류: {e}") print("💾 DB에서 이전 데이터 로드 시도...") return load_models_from_db() def fetch_huggingface_spaces(self, limit: int = 30) -> List[Dict]: """허깅페이스 트렌딩 스페이스 30개 수집 (실제 API)""" print(f"🚀 허깅페이스 트렌딩 스페이스 {limit}개 수집 중...") spaces_list = [] try: # Hugging Face API 사용 api = HfApi() # trending 순위로 스페이스 가져오기 spaces = list(api.list_spaces( sort="trending_score", direction=-1, limit=limit )) print(f"📊 API에서 {len(spaces)}개 스페이스 받음") for idx, space in enumerate(spaces[:limit], 1): try: space_info = { 'space_id': space.id, 'name': space.id.split('/')[-1] if '/' in space.id else space.id, 'author': space.author, 'title': getattr(space, 'title', space.id) or space.id, 'likes': getattr(space, 'likes', 0) or 0, 'url': f"https://huggingface.co/spaces/{space.id}", 'sdk': getattr(space, 'sdk', 'gradio') or 'gradio', 'rank': idx } # LLM 분석 추가 (app.py 분석) print(f" 🔍 {idx}. {space.id} 분석 중...") space_analysis = self.llm_analyzer.analyze_space( space_info['name'], space_info['space_id'], space_info['title'] ) space_info['simple_explanation'] = space_analysis['simple_explanation'] space_info['tech_stack'] = space_analysis['tech_stack'] space_info['description'] = space_info['title'] spaces_list.append(space_info) # API rate limit 방지를 위한 짧은 대기 time.sleep(0.5) # 진행상황 표시 if idx % 5 == 0: print(f" ✓ {idx}개 스페이스 처리 완료...") except Exception as e: print(f" ⚠️ 스페이스 {idx} 처리 오류: {e}") continue print(f"✅ {len(spaces_list)}개 트렌딩 스페이스 수집 완료") # DB에 저장 if spaces_list: save_spaces_to_db(spaces_list) return spaces_list except Exception as e: print(f"❌ 스페이스 수집 오류: {e}") print("💾 DB에서 이전 데이터 로드 시도...") return load_spaces_from_db() def analyze_all_news(self) -> List[Dict]: """모든 뉴스에 LLM 분석 추가""" print("📰 뉴스 LLM 분석 시작...") # 실제 웹사이트에서 뉴스 수집 news = self.fetch_aitimes_news() if not news: print("⚠️ 수집된 뉴스가 없습니다.") return [] analyzed_news = [] for idx, article in enumerate(news, 1): print(f" 🧠 {idx}/{len(news)}: {article['title'][:50]}... 분석 중") analysis = self.llm_analyzer.analyze_news_simple( article['title'], "" ) article['analysis'] = analysis analyzed_news.append(article) print(f"✅ {len(analyzed_news)}개 뉴스 분석 완료") # DB에 저장 if analyzed_news: save_news_to_db(analyzed_news) return analyzed_news def get_all_data(self, force_refresh: bool = False) -> Dict: """모든 데이터 수집 및 분석 Args: force_refresh: True면 새로 수집, False면 DB에서 로드 후 없으면 수집 """ print("\n" + "="*60) print("🚀 AI 뉴스 & 허깅페이스 LLM 분석 시작") print("="*60 + "\n") if force_refresh: print("🔄 강제 새로고침 모드: 모든 데이터 새로 수집") analyzed_news = self.analyze_all_news() analyzed_models = self.fetch_huggingface_models(30) analyzed_spaces = self.fetch_huggingface_spaces(30) else: print("💾 DB 우선 로드 모드") # DB에서 먼저 로드 analyzed_news = load_news_from_db() if not analyzed_news: print("📰 DB에 뉴스 없음 → 새로 수집") analyzed_news = self.analyze_all_news() else: print(f"✅ DB에서 {len(analyzed_news)}개 뉴스 로드") analyzed_models = load_models_from_db() if not analyzed_models: print("🤗 DB에 모델 없음 → 새로 수집") analyzed_models = self.fetch_huggingface_models(30) else: print(f"✅ DB에서 {len(analyzed_models)}개 모델 로드") analyzed_spaces = load_spaces_from_db() if not analyzed_spaces: print("🚀 DB에 스페이스 없음 → 새로 수집") analyzed_spaces = self.fetch_huggingface_spaces(30) else: print(f"✅ DB에서 {len(analyzed_spaces)}개 스페이스 로드") # 통계 stats = { 'total_news': len(analyzed_news), 'hf_models': len(analyzed_models), 'hf_spaces': len(analyzed_spaces), 'llm_analyses': len(analyzed_news) + len(analyzed_models) + len(analyzed_spaces) } print(f"\n✅ 전체 분석 완료: {stats['llm_analyses']}개 항목") print(f" 📰 뉴스: {stats['total_news']}개") print(f" 🤗 모델: {stats['hf_models']}개") print(f" 🚀 스페이스: {stats['hf_spaces']}개") return { 'analyzed_news': analyzed_news, 'analyzed_models': analyzed_models, 'analyzed_spaces': analyzed_spaces, 'stats': stats, 'timestamp': datetime.now().strftime('%Y년 %m월 %d일 %H:%M:%S') } # ============================================ # Flask 라우트 # ============================================ @app.route('/') def index(): """메인 페이지""" try: # refresh 파라미터 확인 force_refresh = request.args.get('refresh', 'false').lower() == 'true' analyzer = AdvancedAIAnalyzer() data = analyzer.get_all_data(force_refresh=force_refresh) return render_template_string(HTML_TEMPLATE, **data) except Exception as e: import traceback error_detail = traceback.format_exc() return f"""

⚠️ 오류 발생

{str(e)}

{error_detail}

""", 500 @app.route('/api/data') def api_data(): """JSON API""" try: force_refresh = request.args.get('refresh', 'false').lower() == 'true' analyzer = AdvancedAIAnalyzer() data = analyzer.get_all_data(force_refresh=force_refresh) return jsonify({ 'success': True, 'data': data }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 500 @app.route('/api/refresh') def api_refresh(): """강제 새로고침 API""" try: analyzer = AdvancedAIAnalyzer() data = analyzer.get_all_data(force_refresh=True) return jsonify({ 'success': True, 'message': '데이터가 성공적으로 갱신되었습니다', 'stats': data['stats'] }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 500 @app.route('/health') def health(): """헬스 체크""" try: # DB 연결 확인 conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM news") news_count = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM models") models_count = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM spaces") spaces_count = cursor.fetchone()[0] conn.close() return jsonify({ "status": "healthy", "service": "AI News LLM Analyzer", "version": "3.2.0", "database": { "connected": True, "news_count": news_count, "models_count": models_count, "spaces_count": spaces_count }, "fireworks_api": { "configured": bool(os.environ.get('FIREWORKS_API_KEY')) }, "timestamp": datetime.now().isoformat() }) except Exception as e: return jsonify({ "status": "unhealthy", "error": str(e) }), 500 # ============================================ # 메인 실행 # ============================================ if __name__ == '__main__': port = int(os.environ.get('PORT', 7860)) print(f""" ╔════════════════════════════════════════════════════════════╗ ║ ║ ║ 🤖 AI 뉴스 & 허깅페이스 LLM 분석 웹앱 v3.2 ║ ║ ║ ╚════════════════════════════════════════════════════════════╝ ✨ 주요 기능: • 💾 SQLite DB 영구 스토리지 • 🌐 AI Times 실시간 뉴스 크롤링 (2개 섹션) • 📰 뉴스 중고등학생 수준 분석 • 🤗 허깅페이스 트렌딩 모델 TOP 30 (모델 카드 분석) • 🚀 허깅페이스 트렌딩 스페이스 TOP 30 (app.py 분석) • 🧠 Fireworks AI (Qwen3-235B) 실시간 LLM 분석 • 🎨 탭 UI (뉴스/모델/스페이스) 🔑 API 설정: FIREWORKS_API_KEY: {"✅ 설정됨" if os.environ.get('FIREWORKS_API_KEY') else "❌ 미설정 (템플릿 모드)"} 🚀 서버 정보: 📍 메인: http://localhost:{port} 🔄 강제갱신: http://localhost:{port}/?refresh=true 📊 API: http://localhost:{port}/api/data 🔥 새로고침 API: http://localhost:{port}/api/refresh 💚 Health: http://localhost:{port}/health 💾 데이터베이스: {DB_PATH} 초기화 중... """) # 데이터베이스 초기화 try: init_database() except Exception as e: print(f"❌ DB 초기화 오류: {e}") sys.exit(1) print("\n✅ 서버 준비 완료!") print("브라우저에서 위 URL을 열어주세요!") print("종료: Ctrl+C\n") try: app.run( host='0.0.0.0', port=port, debug=False, threaded=True ) except KeyboardInterrupt: print("\n\n👋 서버 종료!") sys.exit(0) except Exception as e: print(f"\n❌서버 오류: {e}") sys.exit(1)