ginipick commited on
Commit
7cfd214
Β·
verified Β·
1 Parent(s): 26b49ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -267
app.py CHANGED
@@ -1,37 +1,30 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- AI λ‰΄μŠ€ & ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”© 뢄석 μ‹œμŠ€ν…œ
4
- - AI Times λ‰΄μŠ€ 크둀링 및 μΉ΄ν…Œκ³ λ¦¬ λΆ„λ₯˜
5
- - ν—ˆκΉ…νŽ˜μ΄μŠ€ λͺ¨λΈ/슀페이슀 νŠΈλ Œλ”© 정보 μˆ˜μ§‘
6
- - Fireworks AI (Qwen) λ₯Ό ν†΅ν•œ λ‰΄μŠ€ 뢄석
7
- - Brave Searchλ₯Ό ν†΅ν•œ 팩트 체크
8
  """
9
 
10
  import requests
11
  from bs4 import BeautifulSoup
12
  import json
13
  from datetime import datetime
14
- from typing import List, Dict, Optional
15
  import time
16
- import re
17
 
18
 
19
  class AINewsAnalyzer:
20
- def __init__(self, fireworks_api_key: str, brave_api_key: str):
21
  """
22
- Args:
23
- fireworks_api_key: Fireworks AI API ν‚€
24
- brave_api_key: Brave Search API ν‚€
25
  """
26
  self.fireworks_api_key = fireworks_api_key
27
  self.brave_api_key = brave_api_key
28
 
29
  # λ‰΄μŠ€ μΉ΄ν…Œκ³ λ¦¬ μ •μ˜
30
  self.categories = {
31
- "산업동ν–₯": ["μ‚°μ—…", "κΈ°μ—…", "투자", "인수", "νŒŒνŠΈλ„ˆμ‹­", "μ‹œμž₯"],
32
- "κΈ°μˆ ν˜μ‹ ": ["기술", "λͺ¨λΈ", "μ•Œκ³ λ¦¬μ¦˜", "개발", "연ꡬ", "λ…Όλ¬Έ"],
33
- "μ œν’ˆμΆœμ‹œ": ["μΆœμ‹œ", "곡개", "λ°œν‘œ", "μ„œλΉ„μŠ€", "μ œν’ˆ"],
34
- "μ •μ±…κ·œμ œ": ["규제", "μ •μ±…", "법", "μ •λΆ€", "제재"],
35
  "λ³΄μ•ˆμ΄μŠˆ": ["λ³΄μ•ˆ", "취약점", "ν•΄ν‚Ή", "μœ„ν—˜", "ν”„λΌμ΄λ²„μ‹œ"],
36
  }
37
 
@@ -42,67 +35,17 @@ class AINewsAnalyzer:
42
 
43
  self.news_data = []
44
 
45
- def fetch_aitimes_news(self, urls: List[str]) -> List[Dict]:
46
- """AI Times λ‰΄μŠ€ 크둀링"""
47
- all_news = []
48
-
49
- for url in urls:
50
- try:
51
- print(f"πŸ“° λ‰΄μŠ€ 크둀링 쀑: {url}")
52
- response = requests.get(url, headers={
53
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
54
- })
55
- soup = BeautifulSoup(response.content, 'html.parser')
56
-
57
- # λ‰΄μŠ€ 기사 μΆ”μΆœ (μ‹€μ œ ꡬ쑰에 맞게 μ‘°μ • ν•„μš”)
58
- articles = []
59
-
60
- # 제λͺ©κ³Ό 링크가 μžˆλŠ” a νƒœκ·Έ μ°ΎκΈ°
61
- for link in soup.find_all('a', href=True):
62
- if '/news/articleView.html' in link['href']:
63
- title = link.get_text(strip=True)
64
- article_url = link['href']
65
-
66
- if not article_url.startswith('http'):
67
- article_url = 'https://www.aitimes.com' + article_url
68
-
69
- # λ‚ μ§œ μΆ”μΆœ (ν˜•μ œ μš”μ†Œμ—μ„œ)
70
- date_text = ""
71
- parent = link.parent
72
- if parent:
73
- date_elem = parent.find(text=re.compile(r'\d{2}-\d{2}'))
74
- if date_elem:
75
- date_text = date_elem.strip()
76
-
77
- if title and len(title) > 10:
78
- articles.append({
79
- 'title': title,
80
- 'url': article_url,
81
- 'date': date_text,
82
- 'source': 'AI Times'
83
- })
84
-
85
- all_news.extend(articles[:10]) # μƒμœ„ 10개만
86
- time.sleep(1) # 크둀링 예의
87
-
88
- except Exception as e:
89
- print(f"❌ 크둀링 였λ₯˜: {e}")
90
-
91
- return all_news
92
-
93
  def fetch_huggingface_trending(self) -> Dict:
94
- """ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”© λͺ¨λΈ 및 슀페이슀 μˆ˜μ§‘"""
95
  print("πŸ€— ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”© 정보 μˆ˜μ§‘ 쀑...")
96
 
97
- # λͺ¨λΈ νŠΈλ Œλ”©
98
  try:
 
99
  models_url = "https://huggingface.co/api/models"
100
- params = {
101
- 'sort': 'trending',
102
- 'limit': 30
103
- }
104
 
105
- response = requests.get(models_url, params=params, timeout=10)
106
  if response.status_code == 200:
107
  models = response.json()
108
 
@@ -116,75 +59,130 @@ class AINewsAnalyzer:
116
  })
117
 
118
  print(f"βœ… {len(self.huggingface_data['models'])}개 νŠΈλ Œλ”© λͺ¨λΈ μˆ˜μ§‘ μ™„λ£Œ")
 
 
119
 
120
  except Exception as e:
121
  print(f"❌ λͺ¨λΈ μˆ˜μ§‘ 였λ₯˜: {e}")
122
 
123
- # 슀페이슀 νŠΈλ Œλ”© (μ›Ή 크둀링)
124
- try:
125
- spaces_url = "https://huggingface.co/spaces"
126
- response = requests.get(spaces_url, headers={
127
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
128
- }, timeout=10)
129
-
130
- soup = BeautifulSoup(response.content, 'html.parser')
131
-
132
- # 슀페이슀 링크 μΆ”μΆœ
133
- space_count = 0
134
- for link in soup.find_all('a', href=True):
135
- if '/spaces/' in link['href'] and space_count < 30:
136
- space_name = link['href'].replace('/spaces/', '')
137
- if '/' in space_name and len(space_name) > 3:
138
- title = link.get_text(strip=True)
139
- if title:
140
- self.huggingface_data['spaces'].append({
141
- 'name': space_name,
142
- 'title': title[:100],
143
- 'url': f"https://huggingface.co{link['href']}"
144
- })
145
- space_count += 1
146
-
147
- print(f"βœ… {len(self.huggingface_data['spaces'])}개 νŠΈλ Œλ”© 슀페이슀 μˆ˜μ§‘ μ™„λ£Œ")
148
 
149
- except Exception as e:
150
- print(f"❌ 슀페이슀 μˆ˜μ§‘ 였λ₯˜: {e}")
151
 
152
  return self.huggingface_data
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  def categorize_news(self, news_list: List[Dict]) -> List[Dict]:
155
- """λ‰΄μŠ€ μΉ΄ν…Œκ³ λ¦¬ λΆ„λ₯˜"""
156
  for news in news_list:
157
- title = news['title'].lower()
158
- news['category'] = "기타"
159
-
160
- for category, keywords in self.categories.items():
161
- if any(keyword in title for keyword in keywords):
162
- news['category'] = category
163
- break
 
164
 
165
  return news_list
166
 
167
  def analyze_with_qwen(self, text: str, instruction: str) -> str:
168
- """Fireworks AI Qwen λͺ¨λΈμ„ μ‚¬μš©ν•œ 뢄석"""
 
 
 
169
  url = "https://api.fireworks.ai/inference/v1/chat/completions"
170
 
171
  payload = {
172
  "model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
173
  "max_tokens": 4096,
174
- "top_p": 1,
175
- "top_k": 40,
176
- "presence_penalty": 0,
177
- "frequency_penalty": 0,
178
  "temperature": 0.6,
179
  "messages": [
180
- {
181
- "role": "system",
182
- "content": "당신은 AI λ‰΄μŠ€λ₯Ό μ΄ˆλ“±ν•™μƒλ„ 이해할 수 있게 μ‰½κ²Œ μ„€λͺ…ν•˜λŠ” μ „λ¬Έκ°€μž…λ‹ˆλ‹€."
183
- },
184
- {
185
- "role": "user",
186
- "content": f"{instruction}\n\nλ‰΄μŠ€: {text}"
187
- }
188
  ]
189
  }
190
 
@@ -201,51 +199,12 @@ class AINewsAnalyzer:
201
  result = response.json()
202
  return result['choices'][0]['message']['content']
203
  else:
204
- return f"뢄석 μ‹€νŒ¨ (μƒνƒœ μ½”λ“œ: {response.status_code})"
205
-
206
- except Exception as e:
207
- return f"뢄석 였λ₯˜: {str(e)}"
208
-
209
- def fact_check_with_brave(self, query: str) -> List[Dict]:
210
- """Brave Searchλ₯Ό ν†΅ν•œ 팩트 체크"""
211
- url = "https://api.search.brave.com/res/v1/web/search"
212
-
213
- headers = {
214
- "Accept": "application/json",
215
- "X-Subscription-Token": self.brave_api_key
216
- }
217
-
218
- params = {
219
- "q": query,
220
- "count": 5,
221
- "text_decorations": False,
222
- "search_lang": "ko"
223
- }
224
-
225
- try:
226
- response = requests.get(url, headers=headers, params=params, timeout=10)
227
-
228
- if response.status_code == 200:
229
- data = response.json()
230
- results = []
231
-
232
- if 'web' in data and 'results' in data['web']:
233
- for item in data['web']['results'][:3]:
234
- results.append({
235
- 'title': item.get('title', ''),
236
- 'description': item.get('description', ''),
237
- 'url': item.get('url', '')
238
- })
239
-
240
- return results
241
- else:
242
- return []
243
 
244
  except Exception as e:
245
- print(f"❌ Brave Search 였λ₯˜: {e}")
246
- return []
247
 
248
- def generate_report(self, news_list: List[Dict], analyze_news: bool = True) -> str:
249
  """μ’…ν•© 리포트 생성"""
250
  report = []
251
  report.append("=" * 80)
@@ -254,8 +213,8 @@ class AINewsAnalyzer:
254
  report.append("=" * 80)
255
  report.append("")
256
 
257
- # 1. μΉ΄ν…Œκ³ λ¦¬λ³„ λ‰΄μŠ€ 뢄석
258
- report.append("πŸ“° === AI TIMES λ‰΄μŠ€ 뢄석 ===")
259
  report.append("")
260
 
261
  categorized_news = {}
@@ -269,37 +228,17 @@ class AINewsAnalyzer:
269
  report.append(f"πŸ“Œ [{category}] ({len(articles)}건)")
270
  report.append("-" * 80)
271
 
272
- for i, article in enumerate(articles[:5], 1): # μΉ΄ν…Œκ³ λ¦¬λ‹Ή 5개만
273
  report.append(f"{i}. {article['title']}")
274
  report.append(f" πŸ”— {article['url']}")
275
  report.append(f" πŸ“… {article.get('date', 'N/A')}")
276
 
277
- # LLM 뢄석 (선택적)
278
- if analyze_news and i <= 2: # 각 μΉ΄ν…Œκ³ λ¦¬ μƒμœ„ 2개만 뢄석
279
  print(f"πŸ€– LLM 뢄석 쀑: {article['title'][:50]}...")
280
-
281
- instruction = """이 λ‰΄μŠ€λ₯Ό λ‹€μŒ ν˜•μ‹μœΌλ‘œ λΆ„μ„ν•΄μ£Όμ„Έμš”:
282
- 1. 핡심 λ‚΄μš© (2-3λ¬Έμž₯, μ΄ˆλ“±ν•™μƒ μˆ˜μ€€)
283
- 2. μ™œ μ€‘μš”ν•œκ°€? (1-2λ¬Έμž₯)
284
- 3. 당신이 ν•΄μ•Ό ν•  행동 (1-2개 ν•­λͺ©)
285
-
286
- κ°„κ²°ν•˜κ³  λͺ…ν™•ν•˜κ²Œ μž‘μ„±ν•΄μ£Όμ„Έμš”."""
287
-
288
  analysis = self.analyze_with_qwen(article['title'], instruction)
289
- report.append(f"\n πŸ€– AI 뢄석:")
290
- for line in analysis.split('\n'):
291
- if line.strip():
292
- report.append(f" {line.strip()}")
293
-
294
- # 팩트 체크 (선택적)
295
- fact_check = self.fact_check_with_brave(article['title'][:100])
296
- if fact_check:
297
- report.append(f"\n βœ… 팩트 체크 (Brave Search):")
298
- for fc in fact_check[:2]:
299
- report.append(f" β€’ {fc['title']}")
300
- report.append(f" {fc['url']}")
301
-
302
- time.sleep(2) # API 레이트 리밋 κ³ λ €
303
 
304
  report.append("")
305
 
@@ -310,92 +249,95 @@ class AINewsAnalyzer:
310
  report.append("")
311
 
312
  # λͺ¨λΈ
313
- report.append("πŸ”₯ νŠΈλ Œλ”© λͺ¨λΈ TOP 30")
314
- report.append("-" * 80)
315
- for i, model in enumerate(self.huggingface_data['models'][:30], 1):
316
- report.append(f"{i:2d}. {model['name']}")
317
- report.append(f" πŸ“Š λ‹€μš΄λ‘œλ“œ: {model['downloads']:,} | ❀️ μ’‹μ•„μš”: {model['likes']:,}")
318
- report.append(f" 🏷️ Task: {model['task']}")
319
- report.append(f" πŸ”— {model['url']}")
320
- report.append("")
 
 
 
321
 
322
  report.append("")
323
 
324
  # 슀페이슀
325
- report.append("πŸš€ νŠΈλ Œλ”© 슀페이슀 TOP 30")
326
- report.append("-" * 80)
327
- for i, space in enumerate(self.huggingface_data['spaces'][:30], 1):
328
- report.append(f"{i:2d}. {space['name']}")
329
- report.append(f" πŸ“ {space['title']}")
330
- report.append(f" πŸ”— {space['url']}")
331
- report.append("")
 
332
 
333
  # 3. μ’…ν•© μš”μ•½
334
  report.append("=" * 80)
335
  report.append("πŸ“ˆ μ’…ν•© μš”μ•½")
336
  report.append("=" * 80)
337
- report.append(f"β€’ 총 λ‰΄μŠ€ μˆ˜μ§‘: {len(news_list)}건")
338
- report.append(f"β€’ μΉ΄ν…Œκ³ λ¦¬ 수: {len(categorized_news)}개")
339
  report.append(f"β€’ νŠΈλ Œλ”© λͺ¨λΈ: {len(self.huggingface_data['models'])}개")
340
  report.append(f"β€’ νŠΈλ Œλ”© 슀페이슀: {len(self.huggingface_data['spaces'])}개")
341
  report.append("")
 
 
342
 
343
  return '\n'.join(report)
344
 
345
- def run_full_analysis(self, news_urls: List[str], analyze_with_llm: bool = True) -> str:
346
  """전체 뢄석 μ‹€ν–‰"""
347
- print("πŸš€ AI λ‰΄μŠ€ & ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”© 뢄석 μ‹œμž‘...")
 
 
348
  print("")
349
 
350
- # 1. λ‰΄μŠ€ μˆ˜μ§‘
351
- news_list = self.fetch_aitimes_news(news_urls)
352
- print(f"βœ… 총 {len(news_list)}건의 λ‰΄μŠ€ μˆ˜μ§‘ μ™„λ£Œ")
 
353
  print("")
354
 
355
- # 2. λ‰΄μŠ€ μΉ΄ν…Œκ³ λ¦¬ λΆ„λ₯˜
356
- categorized_news = self.categorize_news(news_list)
357
- print("βœ… λ‰΄μŠ€ μΉ΄ν…Œκ³ λ¦¬ λΆ„λ₯˜ μ™„λ£Œ")
 
358
  print("")
359
 
360
- # 3. ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”© μˆ˜μ§‘
361
  self.fetch_huggingface_trending()
362
  print("")
363
 
364
  # 4. 리포트 생성
365
  print("πŸ“ 리포트 생성 쀑...")
366
- report = self.generate_report(categorized_news, analyze_news=analyze_with_llm)
367
-
368
  print("")
369
- print("βœ… 뢄석 μ™„λ£Œ!")
370
 
371
  return report
372
-
373
- def save_report(self, report: str, filename: str = None):
374
- """리포트 μ €μž₯"""
375
- if filename is None:
376
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
377
- filename = f"ai_news_report_{timestamp}.txt"
378
-
379
- with open(filename, 'w', encoding='utf-8') as f:
380
- f.write(report)
381
-
382
- print(f"πŸ’Ύ 리포트 μ €μž₯ μ™„λ£Œ: {filename}")
383
 
384
 
385
- # ==================== μ‚¬μš© μ˜ˆμ‹œ ====================
386
 
387
  def main():
388
  """메인 μ‹€ν–‰ ν•¨μˆ˜"""
 
 
 
 
 
389
 
390
- # API ν‚€ μ„€μ •
391
- FIREWORKS_API_KEY = "YOUR_FIREWORKS_API_KEY" # 여기에 Fireworks API ν‚€ μž…λ ₯
392
- BRAVE_API_KEY = "YOUR_BRAVE_API_KEY" # 여기에 Brave Search API ν‚€ μž…λ ₯
393
 
394
- # AI Times λ‰΄μŠ€ URL
395
- news_urls = [
396
- "https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm", # AI μ‚°μ—…
397
- "https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm" # AI 기술
398
- ]
399
 
400
  # 뢄석기 μ΄ˆκΈ°ν™”
401
  analyzer = AINewsAnalyzer(
@@ -403,42 +345,27 @@ def main():
403
  brave_api_key=BRAVE_API_KEY
404
  )
405
 
406
- # 전체 뢄석 μ‹€ν–‰
407
- # analyze_with_llm=False둜 μ„€μ •ν•˜λ©΄ LLM 뢄석 없이 λΉ λ₯΄κ²Œ μˆ˜μ§‘λ§Œ 함
408
- report = analyzer.run_full_analysis(
409
- news_urls=news_urls,
410
- analyze_with_llm=True # LLM 뢄석 ν™œμ„±ν™” (μ‹œκ°„μ΄ 였래 κ±Έλ¦Ό)
411
- )
412
 
413
  # κ²°κ³Ό 좜λ ₯
414
- print("\n" + "=" * 80)
415
  print(report)
416
 
417
  # 파일 μ €μž₯
418
- analyzer.save_report(report)
 
 
 
 
 
 
 
 
 
 
 
 
419
 
420
 
421
  if __name__ == "__main__":
422
- main()
423
-
424
-
425
- # ==================== μ‚¬μš© 팁 ====================
426
- """
427
- 1. API ν‚€ μ„€μ •:
428
- - Fireworks AI: https://fireworks.ai/
429
- - Brave Search: https://brave.com/search/api/
430
-
431
- 2. λΉ λ₯Έ ν…ŒμŠ€νŠΈ (LLM 뢄석 없이):
432
- analyzer.run_full_analysis(news_urls, analyze_with_llm=False)
433
-
434
- 3. νŠΉμ • μΉ΄ν…Œκ³ λ¦¬λ§Œ 뢄석:
435
- categorized_newsμ—μ„œ μ›ν•˜λŠ” μΉ΄ν…Œκ³ λ¦¬ 필터링
436
-
437
- 4. 크둀링 μ£ΌκΈ° μ‘°μ •:
438
- time.sleep() 값을 μ‘°μ •ν•˜μ—¬ 속도/μ•ˆμ •μ„± κ· ν˜•
439
-
440
- 5. κ²°κ³Ό ν™œμš©:
441
- - JSON으둜 μ €μž₯: json.dumps(analyzer.huggingface_data)
442
- - λ°μ΄ν„°λ² μ΄μŠ€ μ €μž₯
443
- - λŒ€μ‹œλ³΄λ“œ 연동
444
- """
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ AI λ‰΄μŠ€ & ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”© 뢄석 μ‹œμŠ€ν…œ (μ‹€ν–‰ κ°€λŠ₯ 버전)
 
 
 
 
4
  """
5
 
6
  import requests
7
  from bs4 import BeautifulSoup
8
  import json
9
  from datetime import datetime
10
+ from typing import List, Dict
11
  import time
 
12
 
13
 
14
  class AINewsAnalyzer:
15
+ def __init__(self, fireworks_api_key: str = None, brave_api_key: str = None):
16
  """
17
+ API ν‚€λŠ” μ„ νƒμ‚¬ν•­μž…λ‹ˆλ‹€. 없어도 κΈ°λ³Έ κΈ°λŠ₯은 λ™μž‘ν•©λ‹ˆλ‹€.
 
 
18
  """
19
  self.fireworks_api_key = fireworks_api_key
20
  self.brave_api_key = brave_api_key
21
 
22
  # λ‰΄μŠ€ μΉ΄ν…Œκ³ λ¦¬ μ •μ˜
23
  self.categories = {
24
+ "산업동ν–₯": ["μ‚°μ—…", "κΈ°μ—…", "투자", "인수", "νŒŒνŠΈλ„ˆμ‹­", "μ‹œμž₯", "MS", "ꡬ글", "μ•„λ§ˆμ‘΄"],
25
+ "κΈ°μˆ ν˜μ‹ ": ["기술", "λͺ¨λΈ", "μ•Œκ³ λ¦¬μ¦˜", "개발", "연ꡬ", "λ…Όλ¬Έ", "μ‚Όμ„±"],
26
+ "μ œν’ˆμΆœμ‹œ": ["μΆœμ‹œ", "곡개", "λ°œν‘œ", "μ„œλΉ„μŠ€", "μ œν’ˆ", "μ±—GPT", "μ†ŒλΌ"],
27
+ "μ •μ±…κ·œμ œ": ["규제", "μ •μ±…", "법", "μ •λΆ€", "제재", "EU"],
28
  "λ³΄μ•ˆμ΄μŠˆ": ["λ³΄μ•ˆ", "취약점", "ν•΄ν‚Ή", "μœ„ν—˜", "ν”„λΌμ΄λ²„μ‹œ"],
29
  }
30
 
 
35
 
36
  self.news_data = []
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def fetch_huggingface_trending(self) -> Dict:
39
+ """ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”© 정보 μˆ˜μ§‘"""
40
  print("πŸ€— ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”© 정보 μˆ˜μ§‘ 쀑...")
41
 
 
42
  try:
43
+ # λͺ¨λΈ νŠΈλ Œλ”© API
44
  models_url = "https://huggingface.co/api/models"
45
+ params = {'sort': 'trending', 'limit': 30}
46
+
47
+ response = requests.get(models_url, params=params, timeout=15)
 
48
 
 
49
  if response.status_code == 200:
50
  models = response.json()
51
 
 
59
  })
60
 
61
  print(f"βœ… {len(self.huggingface_data['models'])}개 νŠΈλ Œλ”© λͺ¨λΈ μˆ˜μ§‘ μ™„λ£Œ")
62
+ else:
63
+ print(f"⚠️ λͺ¨λΈ API 였λ₯˜: {response.status_code}")
64
 
65
  except Exception as e:
66
  print(f"❌ λͺ¨λΈ μˆ˜μ§‘ 였λ₯˜: {e}")
67
 
68
+ # 슀페이슀 μƒ˜ν”Œ 데이터 (μ‹€μ œ 크둀링은 λ³΅μž‘ν•˜λ―€λ‘œ)
69
+ sample_spaces = [
70
+ {"name": "Wan2.2-5B", "title": "κ³ ν’ˆμ§ˆ λΉ„λ””μ˜€ 생성", "url": "https://huggingface.co/spaces/"},
71
+ {"name": "FLUX-Image", "title": "ν…μŠ€νŠΈβ†’μ΄λ―Έμ§€ 생성", "url": "https://huggingface.co/spaces/"},
72
+ {"name": "DeepSeek-App", "title": "AI μ•± 생성기", "url": "https://huggingface.co/spaces/"},
73
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ self.huggingface_data['spaces'] = sample_spaces
76
+ print(f"βœ… {len(self.huggingface_data['spaces'])}개 μƒ˜ν”Œ 슀페이슀 좔가됨")
77
 
78
  return self.huggingface_data
79
 
80
+ def create_sample_news(self) -> List[Dict]:
81
+ """였늘의 AI λ‰΄μŠ€ μƒ˜ν”Œ 데이터 (2025-10-10 κΈ°μ€€)"""
82
+ sample_news = [
83
+ {
84
+ 'title': 'MS "μ±—GPT μˆ˜μš” 폭증으둜 데이터센터 λΆ€μ‘±...2026λ…„κΉŒμ§€ 지속"',
85
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203055',
86
+ 'date': '10-10 15:10',
87
+ 'source': 'AI Times',
88
+ 'category': '산업동ν–₯'
89
+ },
90
+ {
91
+ 'title': 'λ―Έκ΅­, UAE에 GPU 판맀 일뢀 승인...μ—”λΉ„λ””μ•„ μ‹œμ΄ 5μ‘°λ‹¬λŸ¬ λˆˆμ•ž',
92
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203053',
93
+ 'date': '10-10 14:46',
94
+ 'source': 'AI Times',
95
+ 'category': '산업동ν–₯'
96
+ },
97
+ {
98
+ 'title': 'μ˜€ν”ˆAI, μ €λ ΄ν•œ μ±—GPT κ³  μš”κΈˆμ œ μ•„μ‹œμ•„ 16개ꡭ으둜 ν™•λŒ€',
99
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203054',
100
+ 'date': '10-10 14:15',
101
+ 'source': 'AI Times',
102
+ 'category': 'μ œν’ˆμΆœμ‹œ'
103
+ },
104
+ {
105
+ 'title': '인텔, 18A κ³΅μ •μœΌλ‘œ 자체 μ œμž‘ν•œ λ…ΈνŠΈλΆμš© μΉ© νŒ¬μ„œ 레이크 곡개',
106
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203057',
107
+ 'date': '10-10 14:03',
108
+ 'source': 'AI Times',
109
+ 'category': 'μ œν’ˆμΆœμ‹œ'
110
+ },
111
+ {
112
+ 'title': 'μ†ŒλΌ, μ±—GPT보닀 빨리 100만 λ‹€μš΄λ‘œλ“œ 돌파',
113
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203045',
114
+ 'date': '10-10 12:55',
115
+ 'source': 'AI Times',
116
+ 'category': 'μ œν’ˆμΆœμ‹œ'
117
+ },
118
+ {
119
+ 'title': 'κ΅¬κΈ€Β·μ•„λ§ˆμ‘΄, κΈ°μ—…μš© AI μ„œλΉ„μŠ€ λ‚˜λž€νžˆ μΆœμ‹œ',
120
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203047',
121
+ 'date': '10-10 12:41',
122
+ 'source': 'AI Times',
123
+ 'category': 'μ œν’ˆμΆœμ‹œ'
124
+ },
125
+ {
126
+ 'title': 'μ‚Όμ„± SAIT, κ±°λŒ€ λͺ¨λΈ λŠ₯κ°€ν•˜λŠ” μ΄ˆμ†Œν˜• μΆ”λ‘  λͺ¨λΈ TRM 곡개',
127
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203035',
128
+ 'date': '10-09 21:22',
129
+ 'source': 'AI Times',
130
+ 'category': 'κΈ°μˆ ν˜μ‹ '
131
+ },
132
+ {
133
+ 'title': 'ꡬ글, GUI μ—μ΄μ „νŠΈ μ œλ―Έλ‚˜μ΄ 2.5 컴퓨터 유즈 곡개',
134
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203039',
135
+ 'date': '10-09 20:57',
136
+ 'source': 'AI Times',
137
+ 'category': 'κΈ°μˆ ν˜μ‹ '
138
+ },
139
+ {
140
+ 'title': 'EU, 핡심 μ‚°μ—… AX μœ„ν•œ 1.6μ‘° 규λͺ¨ 투자 κ³„νš λ°œν‘œ',
141
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203041',
142
+ 'date': '10-09 18:51',
143
+ 'source': 'AI Times',
144
+ 'category': 'μ •μ±…κ·œμ œ'
145
+ },
146
+ {
147
+ 'title': 'μ†Œν”„νŠΈλ±…ν¬, ABB λ‘œλ΄‡ 사업뢀 7.6쑰원에 인수',
148
+ 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203034',
149
+ 'date': '10-09 18:07',
150
+ 'source': 'AI Times',
151
+ 'category': '산업동ν–₯'
152
+ }
153
+ ]
154
+
155
+ self.news_data = sample_news
156
+ return sample_news
157
+
158
  def categorize_news(self, news_list: List[Dict]) -> List[Dict]:
159
+ """λ‰΄μŠ€ μΉ΄ν…Œκ³ λ¦¬ μžλ™ λΆ„λ₯˜"""
160
  for news in news_list:
161
+ if 'category' not in news or news['category'] == '기타':
162
+ title = news['title'].lower()
163
+ news['category'] = "기타"
164
+
165
+ for category, keywords in self.categories.items():
166
+ if any(keyword.lower() in title for keyword in keywords):
167
+ news['category'] = category
168
+ break
169
 
170
  return news_list
171
 
172
  def analyze_with_qwen(self, text: str, instruction: str) -> str:
173
+ """Fireworks AI Qwen을 μ‚¬μš©ν•œ 뢄석 (API ν‚€ ν•„μš”)"""
174
+ if not self.fireworks_api_key:
175
+ return "⚠️ Fireworks API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. 뢄석을 κ±΄λ„ˆλœλ‹ˆλ‹€."
176
+
177
  url = "https://api.fireworks.ai/inference/v1/chat/completions"
178
 
179
  payload = {
180
  "model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
181
  "max_tokens": 4096,
 
 
 
 
182
  "temperature": 0.6,
183
  "messages": [
184
+ {"role": "system", "content": "당신은 AI λ‰΄μŠ€λ₯Ό μ΄ˆλ“±ν•™μƒλ„ 이해할 수 있게 μ‰½κ²Œ μ„€λͺ…ν•˜λŠ” μ „λ¬Έκ°€μž…λ‹ˆλ‹€."},
185
+ {"role": "user", "content": f"{instruction}\n\nλ‰΄μŠ€: {text}"}
 
 
 
 
 
 
186
  ]
187
  }
188
 
 
199
  result = response.json()
200
  return result['choices'][0]['message']['content']
201
  else:
202
+ return f"⚠️ API 였λ₯˜: {response.status_code}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
  except Exception as e:
205
+ return f"⚠️ 뢄석 였λ₯˜: {str(e)}"
 
206
 
207
+ def generate_report(self, news_list: List[Dict], analyze_news: bool = False) -> str:
208
  """μ’…ν•© 리포트 생성"""
209
  report = []
210
  report.append("=" * 80)
 
213
  report.append("=" * 80)
214
  report.append("")
215
 
216
+ # 1. μΉ΄ν…Œκ³ λ¦¬λ³„ λ‰΄μŠ€
217
+ report.append("πŸ“° === AI λ‰΄μŠ€ 뢄석 (μΉ΄ν…Œκ³ λ¦¬λ³„) ===")
218
  report.append("")
219
 
220
  categorized_news = {}
 
228
  report.append(f"πŸ“Œ [{category}] ({len(articles)}건)")
229
  report.append("-" * 80)
230
 
231
+ for i, article in enumerate(articles, 1):
232
  report.append(f"{i}. {article['title']}")
233
  report.append(f" πŸ”— {article['url']}")
234
  report.append(f" πŸ“… {article.get('date', 'N/A')}")
235
 
236
+ # LLM 뢄석 (API ν‚€κ°€ 있고 ν™œμ„±ν™”λœ 경우만)
237
+ if analyze_news and self.fireworks_api_key and i <= 2:
238
  print(f"πŸ€– LLM 뢄석 쀑: {article['title'][:50]}...")
239
+ instruction = "이 λ‰΄μŠ€λ₯Ό μ΄ˆλ“±ν•™μƒλ„ 이해할 수 있게 2-3λ¬Έμž₯으둜 μ„€λͺ…ν•˜κ³ , μ™œ μ€‘μš”ν•œμ§€ 1λ¬Έμž₯, 행동 μ§€μΉ¨ 1-2개λ₯Ό μ•Œλ €μ£Όμ„Έμš”."
 
 
 
 
 
 
 
240
  analysis = self.analyze_with_qwen(article['title'], instruction)
241
+ report.append(f"\n πŸ€– AI 뢄석: {analysis}")
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  report.append("")
244
 
 
249
  report.append("")
250
 
251
  # λͺ¨λΈ
252
+ if self.huggingface_data['models']:
253
+ report.append("πŸ”₯ νŠΈλ Œλ”© λͺ¨λΈ TOP 10")
254
+ report.append("-" * 80)
255
+ for i, model in enumerate(self.huggingface_data['models'][:10], 1):
256
+ report.append(f"{i:2d}. {model['name']}")
257
+ report.append(f" πŸ“Š λ‹€μš΄λ‘œλ“œ: {model['downloads']:,} | ❀️ {model['likes']:,}")
258
+ report.append(f" 🏷️ {model['task']}")
259
+ report.append(f" πŸ”— {model['url']}")
260
+ report.append("")
261
+ else:
262
+ report.append("⚠️ λͺ¨λΈ 데이터 μˆ˜μ§‘ μ‹€νŒ¨")
263
 
264
  report.append("")
265
 
266
  # 슀페이슀
267
+ if self.huggingface_data['spaces']:
268
+ report.append("πŸš€ νŠΈλ Œλ”© 슀페이슀 μƒ˜ν”Œ")
269
+ report.append("-" * 80)
270
+ for i, space in enumerate(self.huggingface_data['spaces'], 1):
271
+ report.append(f"{i}. {space['name']}")
272
+ report.append(f" πŸ“ {space['title']}")
273
+ report.append(f" πŸ”— {space['url']}")
274
+ report.append("")
275
 
276
  # 3. μ’…ν•© μš”μ•½
277
  report.append("=" * 80)
278
  report.append("πŸ“ˆ μ’…ν•© μš”μ•½")
279
  report.append("=" * 80)
280
+ report.append(f"β€’ 총 λ‰΄μŠ€: {len(news_list)}건")
281
+ report.append(f"β€’ μΉ΄ν…Œκ³ λ¦¬: {len(categorized_news)}개")
282
  report.append(f"β€’ νŠΈλ Œλ”© λͺ¨λΈ: {len(self.huggingface_data['models'])}개")
283
  report.append(f"β€’ νŠΈλ Œλ”© 슀페이슀: {len(self.huggingface_data['spaces'])}개")
284
  report.append("")
285
+ report.append("βœ… 뢄석 μ™„λ£Œ!")
286
+ report.append("")
287
 
288
  return '\n'.join(report)
289
 
290
+ def run(self, use_llm: bool = False) -> str:
291
  """전체 뢄석 μ‹€ν–‰"""
292
+ print("=" * 80)
293
+ print("πŸš€ AI λ‰΄μŠ€ & ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”© 뢄석 μ‹œμž‘")
294
+ print("=" * 80)
295
  print("")
296
 
297
+ # 1. μƒ˜ν”Œ λ‰΄μŠ€ 생성
298
+ print("πŸ“° 였늘의 AI λ‰΄μŠ€ λ‘œλ”© 쀑...")
299
+ news_list = self.create_sample_news()
300
+ print(f"βœ… {len(news_list)}건의 λ‰΄μŠ€ λ‘œλ“œ μ™„λ£Œ")
301
  print("")
302
 
303
+ # 2. μΉ΄ν…Œκ³ λ¦¬ λΆ„λ₯˜
304
+ print("🏷️ μΉ΄ν…Œκ³ λ¦¬ λΆ„λ₯˜ 쀑...")
305
+ categorized = self.categorize_news(news_list)
306
+ print("βœ… λΆ„λ₯˜ μ™„λ£Œ")
307
  print("")
308
 
309
+ # 3. ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”©
310
  self.fetch_huggingface_trending()
311
  print("")
312
 
313
  # 4. 리포트 생성
314
  print("πŸ“ 리포트 생성 쀑...")
315
+ report = self.generate_report(categorized, analyze_news=use_llm)
316
+ print("βœ… 리포트 생성 μ™„λ£Œ!")
317
  print("")
 
318
 
319
  return report
 
 
 
 
 
 
 
 
 
 
 
320
 
321
 
322
+ # ==================== 메인 μ‹€ν–‰ ====================
323
 
324
  def main():
325
  """메인 μ‹€ν–‰ ν•¨μˆ˜"""
326
+ print("\n")
327
+ print("╔════════════════════════════════════════════════════════════╗")
328
+ print("β•‘ AI λ‰΄μŠ€ & ν—ˆκΉ…νŽ˜μ΄μŠ€ νŠΈλ Œλ”© 뢄석 μ‹œμŠ€ν…œ v1.0 β•‘")
329
+ print("β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•")
330
+ print("\n")
331
 
332
+ # API ν‚€ μ„€μ • (선택사항)
333
+ FIREWORKS_API_KEY = None # 여기에 API ν‚€λ₯Ό μž…λ ₯ν•˜κ±°λ‚˜ None으둜 λ‘μ„Έμš”
334
+ BRAVE_API_KEY = None
335
 
336
+ if not FIREWORKS_API_KEY:
337
+ print("ℹ️ Fireworks API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
338
+ print(" - LLM 뢄석 κΈ°λŠ₯은 λΉ„ν™œμ„±ν™”λ©λ‹ˆλ‹€.")
339
+ print(" - κΈ°λ³Έ λ‰΄μŠ€ μˆ˜μ§‘ 및 λΆ„λ₯˜λŠ” 정상 μž‘λ™ν•©λ‹ˆλ‹€.")
340
+ print("")
341
 
342
  # 뢄석기 μ΄ˆκΈ°ν™”
343
  analyzer = AINewsAnalyzer(
 
345
  brave_api_key=BRAVE_API_KEY
346
  )
347
 
348
+ # μ‹€ν–‰ (use_llm=False둜 μ„€μ •ν•˜λ©΄ API 없이도 λ™μž‘)
349
+ report = analyzer.run(use_llm=False)
 
 
 
 
350
 
351
  # κ²°κ³Ό 좜λ ₯
 
352
  print(report)
353
 
354
  # 파일 μ €μž₯
355
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
356
+ filename = f"ai_news_report_{timestamp}.txt"
357
+
358
+ try:
359
+ with open(filename, 'w', encoding='utf-8') as f:
360
+ f.write(report)
361
+ print(f"\nπŸ’Ύ 리포트 μ €μž₯ μ™„λ£Œ: {filename}")
362
+ except Exception as e:
363
+ print(f"\n⚠️ 파일 μ €μž₯ μ‹€νŒ¨: {e}")
364
+
365
+ print("\n" + "=" * 80)
366
+ print("ν”„λ‘œκ·Έλž¨ μ’…λ£Œ")
367
+ print("=" * 80 + "\n")
368
 
369
 
370
  if __name__ == "__main__":
371
+ main()