820nam commited on
Commit
d3950e7
ยท
verified ยท
1 Parent(s): ca52119

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -171
app.py CHANGED
@@ -1,107 +1,57 @@
1
- # app.py
2
- # ์ •์น˜ ํŽธํ–ฅ ๋ฐฉ์ง€ ๋ฐ๋ชจ (๋ผ๋ฒจ๋ง์€ ๋‚ด๋ถ€ ํ™œ์šฉ, ํ™”๋ฉด์€ 'ํŒฉํŠธ ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ' ์ค‘์‹ฌ)
3
- # - ๋ฐ์ดํ„ฐ: HF jacobvs/PoliticalTweets
4
- # - ๋ชจ๋ธ: TF-IDF + SGDClassifier (์ฆ๋ถ„ ํ•™์Šต)
5
- # - ์™ธ๋ถ€: (์„ ํƒ) ๋„ค์ด๋ฒ„ ๋‰ด์Šค API๋กœ ๊ธฐ์‚ฌ ๊ฒ€์ƒ‰, (์„ ํƒ) OpenAI๋กœ ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์žฌ๊ตฌ์„ฑ
6
 
7
  import os
8
- import json
9
- import time
10
- from typing import List, Dict, Tuple, Optional
11
 
12
- import requests
13
  import streamlit as st
14
-
15
- # ML
16
- import joblib
17
- from sklearn.feature_extraction.text import TfidfVectorizer
18
- from sklearn.linear_model import SGDClassifier
19
- from sklearn.metrics import classification_report, accuracy_score
20
-
21
- # HF datasets
22
- from datasets import load_dataset
23
 
24
  # =========================
25
- # App Config
26
  # =========================
27
- APP_TITLE = "์ •์น˜ ํŽธํ–ฅ ๋ฐฉ์ง€ ยท ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ (Streamlit)"
28
- MODEL_PATH = "incremental_model.pkl"
29
- VECT_PATH = "tfidf_vectorizer.pkl"
30
-
31
  OPENAI_API_KEY = os.getenv("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") # ์„ ํƒ: ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์žฌ๊ตฌ์„ฑ์— ์‚ฌ์šฉ
32
  NAVER_ID = os.getenv("I_8koTJh3R5l4wLurQbG") # ์„ ํƒ: ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰
33
  NAVER_SECRET = os.getenv("W5oWYlAgur")
34
 
 
 
 
35
  st.set_page_config(page_title=APP_TITLE, page_icon="๐Ÿงญ", layout="wide")
36
  st.title(APP_TITLE)
37
- st.caption("TF-IDF + SGDClassifier ์ฆ๋ถ„ํ•™์Šต ยท ํ…์ŠคํŠธ ์„ฑํ–ฅ ๋ถ„๋ฅ˜(๋‚ด๋ถ€) ยท ํ™”๋ฉด์€ 'ํŒฉํŠธ ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ'๋งŒ ๋…ธ์ถœ")
38
 
39
  # =========================
40
- # Data & Labels
41
  # =========================
42
- @st.cache_resource(show_spinner=False)
43
- def load_political_tweets():
44
- """
45
- HuggingFace: jacobvs/PoliticalTweets
46
- columns ์˜ˆ: ['index','data','id','username','text','party']
47
- label mapping: Democrat=0, Republican=1, NEUTRAL=2
48
- """
49
- ds = load_dataset("jacobvs/PoliticalTweets")
50
-
51
- def to_example(x):
52
- t = x.get("text") or x.get("data") or ""
53
- p = x.get("party") or ""
54
- return {"text": t, "label": p}
55
-
56
- train = [to_example(r) for r in ds["train"]]
57
- test = [to_example(r) for r in ds["test"]]
58
- return train, test
59
-
60
- def label_to_id(lbl: str) -> int:
61
- lbl = (lbl or "").strip().lower()
62
- if "dem" in lbl: return 0
63
- if "rep" in lbl: return 1
64
- return 2
65
-
66
- def id_to_label(i: int) -> str:
67
- return ["Democrat","Republican","NEUTRAL"][i]
68
-
69
- def split_xy(rows: List[Dict[str,str]]) -> Tuple[List[str], List[int]]:
70
- X = [(r["text"] or "") for r in rows]
71
- y = [label_to_id(r["label"]) for r in rows]
72
- return X, y
73
 
74
- # =========================
75
- # Model
76
- # =========================
77
- def init_or_load_model():
78
- if os.path.exists(MODEL_PATH) and os.path.exists(VECT_PATH):
79
- model = joblib.load(MODEL_PATH)
80
- vect = joblib.load(VECT_PATH)
81
- else:
82
- model = SGDClassifier(loss="log_loss")
83
- vect = TfidfVectorizer(max_features=10000, ngram_range=(1,2), stop_words="english")
84
- return model, vect
85
-
86
- def incremental_fit(model, vect, texts: List[str], labels: List[int]):
87
- X = vect.fit_transform(texts)
88
- model.partial_fit(X, labels, classes=[0,1,2])
89
- joblib.dump(model, MODEL_PATH)
90
- joblib.dump(vect, VECT_PATH)
91
- return model, vect
92
-
93
- def predict(model, vect, text: str) -> Tuple[int, List[float]]:
94
- X = vect.transform([text])
95
- probs = getattr(model, "predict_proba", None)
96
- if probs is None:
97
- y = int(model.predict(X)[0])
98
- return y, [0.0, 0.0, 0.0]
99
- pr = model.predict_proba(X)[0]
100
- y = int(pr.argmax())
101
- return y, pr.tolist()
102
 
103
  # =========================
104
- # External: Naver News (optional)
105
  # =========================
106
  def fetch_naver_news(query: str, display: int = 10) -> List[Dict[str,str]]:
107
  if not (NAVER_ID and NAVER_SECRET):
@@ -114,14 +64,11 @@ def fetch_naver_news(query: str, display: int = 10) -> List[Dict[str,str]]:
114
  if r.status_code != 200:
115
  return []
116
  items = r.json().get("items", [])
117
- out = []
118
- for it in items:
119
- out.append({
120
- "title": it.get("title",""),
121
- "desc": it.get("description",""),
122
- "link": it.get("link","")
123
- })
124
- return out
125
  except Exception:
126
  return []
127
 
@@ -129,24 +76,15 @@ def fetch_naver_news(query: str, display: int = 10) -> List[Dict[str,str]]:
129
  # OpenAI: Fact-based neutral rewrite (optional)
130
  # =========================
131
  def generate_fact_based(text: str) -> str:
132
- """
133
- ์ •์น˜์  ํ•ด์„/ํ‰๊ฐ€ ์ œ๊ฑฐ, ์‚ฌ์‹ค ์ค‘์‹ฌ ์ค‘๋ฆฝ ๊ธฐ์‚ฌ๋กœ ์žฌ๊ตฌ์„ฑ
134
- """
135
  if not OPENAI_API_KEY:
136
  return "(OPENAI_API_KEY ๋ฏธ์„ค์ •: ์žฌ๊ตฌ์„ฑ ์ƒ๋žต๋จ)"
137
  import openai
138
  openai.api_key = OPENAI_API_KEY
139
-
140
  prompt = (
141
- "๋‹ค์Œ ํ…์ŠคํŠธ๋ฅผ ์ •์น˜์  ํ•ด์„์ด๋‚˜ ์˜๊ฒฌ ์—†์ด, ์‚ฌ์‹ค ์ค‘์‹ฌ์˜ ์ค‘๋ฆฝ ๊ธฐ์‚ฌ๋กœ ์žฌ๊ตฌ์„ฑํ•˜์„ธ์š”.\n"
142
- "๊ทœ์น™:\n"
143
- "1) ๋ˆ„๊ฐ€(ํ–‰์œ„์ž)ยท์–ธ์ œยท์–ด๋””์„œยท๋ฌด์—‡์„ยท์–ด๋–ป๊ฒŒยท์™œ ์ค‘ ์‚ฌ์‹ค ์ •๋ณด๋งŒ ์„œ์ˆ \n"
144
- "2) ํ‰๊ฐ€/๊ฐ์ •/์ถ”์ธก ํ‘œํ˜„ ์‚ญ์ œ\n"
145
- "3) ์ˆ˜์น˜ยท๋‚ ์งœยท์ธ์šฉ์€ ์›๋ฌธ์— ์žˆ๋Š” ๋ฒ”์œ„์—์„œ๋งŒ ์‚ฌ์šฉ\n"
146
- "4) ํ•œ๊ตญ์–ด 5~7๋ฌธ์žฅ, ์ œ๋ชฉ ์—†์ด ๋ณธ๋ฌธ๋งŒ\n\n"
147
  f"[์›๋ฌธ]\n{text}\n\n[์ค‘๋ฆฝ ๊ธฐ์‚ฌ]"
148
  )
149
-
150
  try:
151
  resp = openai.ChatCompletion.create(
152
  model="gpt-4o-mini",
@@ -159,89 +97,54 @@ def generate_fact_based(text: str) -> str:
159
  return f"(์žฌ๊ตฌ์„ฑ ์‹คํŒจ: {e})"
160
 
161
  # =========================
162
- # Sidebar: Train / Evaluate
163
  # =========================
164
  with st.sidebar:
165
- st.subheader("๋ฐ์ดํ„ฐ & ํ•™์Šต")
166
- if st.button("โ‘  ๋ฐ์ดํ„ฐ ๋กœ๋“œ & ์ฆ๋ถ„ํ•™์Šต", use_container_width=True):
167
- with st.spinner("๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ ๋ฐ ํ•™์Šต ์ค‘โ€ฆ"):
168
- train, test = load_political_tweets()
169
- Xtr, ytr = split_xy(train)
170
- model, vect = init_or_load_model()
171
- model, vect = incremental_fit(model, vect, Xtr, ytr)
172
- st.success("ํ•™์Šต ์™„๋ฃŒ. ๋ชจ๋ธ/๋ฒกํ„ฐ ์ €์žฅ๋จ.")
173
-
174
- if st.button("โ‘ก ์„ฑ๋Šฅ ํ‰๊ฐ€", use_container_width=True):
175
- try:
176
- _, test = load_political_tweets()
177
- Xte_texts, yte = split_xy(test)
178
- model, vect = init_or_load_model()
179
- Xte = vect.transform(Xte_texts)
180
- ypred = model.predict(Xte)
181
- acc = accuracy_score(yte, ypred)
182
- st.write(f"์ •ํ™•๋„: **{acc:.3f}**")
183
- st.code(classification_report(yte, ypred, target_names=["Democrat","Republican","NEUTRAL"]))
184
- except Exception as e:
185
- st.error(e)
186
-
187
- st.markdown("---")
188
- st.caption("์ฃผ์˜: ๋ฐ์ดํ„ฐ๋Š” ๋ฏธ๊ตญ ์ •์น˜ ํŠธ์œ—(์˜๋ฌธ). ํ•œ๊ตญ์–ด ๊ธฐ์‚ฌ ์ผ๋ฐ˜ํ™”์—” ํ•œ๊ณ„๊ฐ€ ์žˆ์Œ.")
189
 
190
  # =========================
191
- # Main: News fetch โ†’ classify (internal) โ†’ fact-based rewrite (visible)
192
  # =========================
193
  st.markdown("### 1) (์„ ํƒ) ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰")
194
  q = st.text_input("๊ฒ€์ƒ‰์–ด", value="๋ฏธ ๋Œ€์„ ")
195
  cnt = st.slider("ํ‘œ์‹œ ๊ฐœ์ˆ˜", 1, 20, 10)
196
  news_items: List[Dict[str,str]] = []
197
-
198
- col_btn1, col_btn2 = st.columns([1,1])
199
- with col_btn1:
200
- if st.button("๋‰ด์Šค ๋ถˆ๋Ÿฌ์˜ค๊ธฐ"):
201
- with st.spinner("๋„ค์ด๋ฒ„ ๋‰ด์Šค ์ˆ˜์ง‘ ์ค‘โ€ฆ"):
202
- news_items = fetch_naver_news(q, cnt)
203
- if not news_items:
204
- st.info("๋„ค์ด๋ฒ„ API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ ํ˜ธ์ถœ ์‹คํŒจ. ์•„๋ž˜ ์ž์œ  ์ž…๋ ฅ์œผ๋กœ ํ…Œ์ŠคํŠธํ•˜์„ธ์š”.")
205
 
206
  st.markdown("### 2) ํ…์ŠคํŠธ ๋ถ„์„ & ์‚ฌ๏ฟฝ๏ฟฝ๏ฟฝ ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ")
207
- left, right = st.columns([1,1])
208
-
209
- with left:
210
- sample = ""
211
- if news_items:
212
- # ์ฒซ ์•„์ดํ…œ์„ ์ƒ˜ํ”Œ๋กœ ๋„ฃ์–ด์คŒ (์‚ฌ์šฉ์ž๊ฐ€ ์ˆ˜์ • ๊ฐ€๋Šฅ)
213
- sample = f"{news_items[0]['title']} โ€” {news_items[0]['desc']}"
214
  text = st.text_area("๋ถ„์„ํ•  ํ…์ŠคํŠธ(๋‰ด์Šค ์ œ๋ชฉ+์š”์•ฝ ๋“ฑ)", value=sample, height=220)
215
 
216
- with right:
217
  if st.button("๋ถ„์„ ๋ฐ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ ์‹คํ–‰"):
218
  if not text.strip():
219
  st.warning("ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.")
220
  else:
221
- # 1) ๋‚ด๋ถ€ ๋ถ„๋ฅ˜ (ํ™”๋ฉด์— ๊ฐ•์กฐํ•˜์ง€ ์•Š์Œ)
222
- model, vect = init_or_load_model()
223
- if not os.path.exists(MODEL_PATH) or not os.path.exists(VECT_PATH):
224
- st.warning("๋จผ์ € ์‚ฌ์ด๋“œ๋ฐ”์—์„œ '๋ฐ์ดํ„ฐ ๋กœ๋“œ & ์ฆ๋ถ„ํ•™์Šต'์„ ์‹คํ–‰ํ•˜์„ธ์š”.")
225
- else:
226
- y, pr = predict(model, vect, text)
227
- pred = id_to_label(y)
228
-
229
- # 2) ์ค‘๋ฆฝ ๊ธฐ์‚ฌ ์žฌ๊ตฌ์„ฑ (ํ™”๋ฉด ๋…ธ์ถœ)
230
- st.markdown("#### โœ… ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ ๊ฒฐ๊ณผ")
231
- article = generate_fact_based(text)
232
- st.write(article)
233
-
234
- # 3) (์„ ํƒ) ์ง„๋‹จ ์ •๋ณด๋Š” ์ ‘์–ด์„œ ์ œ๊ณต
235
- with st.expander("์ง„๋‹จ ๋ณด๊ธฐ(๋‚ด๋ถ€ ๋ผ๋ฒจ/ํ™•๋ฅ )"):
236
- st.write(f"์˜ˆ์ธก ๋ผ๋ฒจ: **{pred}**")
237
- st.write(f"ํ™•๋ฅ (๋ฏผยท๊ณตยท์ค‘): {pr}")
238
-
239
- if news_items:
240
- with st.expander("์›๋ฌธ ๋งํฌ"):
241
- st.write(news_items[0].get("link","(๋งํฌ ์—†์Œ)"))
242
 
243
  st.markdown("---")
244
- st.caption(
245
- "๋ฐ๋ชจ ์šฉ๋„. ์‹ค์ œ ์„œ๋น„์Šค์—์„œ๋Š” ์ถœ์ฒ˜ ์ž๋™์ถ”์ถœ, ์‚ฌ์‹ค ๊ฒ€์ฆ(์˜ˆ: ์ธ์šฉยท์ˆซ์ž ๊ต์ฐจ๊ฒ€์ฆ), "
246
- "์ •์น˜์  ํ‘œํ˜„ ํ•„ํ„ฐ๋ง, ๊ฐœ์ธ์ •๋ณด/๋ช…์˜ˆํ›ผ์† ์•ˆ์ „์žฅ์น˜๊ฐ€ ์ถ”๊ฐ€๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค."
247
- )
 
1
+ # ์ •์น˜ ํŽธํ–ฅ ๋ฐฉ์ง€ ยท ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ (BERT ๋ถ„๋ฅ˜๊ธฐ ๋ฒ„์ „)
2
+ # - ๋ถ„๋ฅ˜: bucketresearch/politicalBiasBERT (left/center/right)
3
+ # - ์žฌ๊ตฌ์„ฑ: OpenAI๋กœ ์‚ฌ์‹ค ์ค‘์‹ฌ ์š”์•ฝ/์žฌ์ž‘์„ฑ
4
+ # - ์˜ต์…˜: ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰
 
5
 
6
  import os
7
+ from typing import List, Dict, Tuple
 
 
8
 
 
9
  import streamlit as st
10
+ import requests
11
+ import torch
12
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 
 
 
 
 
 
13
 
14
  # =========================
15
+ # Config
16
  # =========================
17
+ APP_TITLE = "์ •์น˜ ํŽธํ–ฅ ๋ถ„์„(BERT) ยท ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ"
 
 
 
18
  OPENAI_API_KEY = os.getenv("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") # ์„ ํƒ: ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์žฌ๊ตฌ์„ฑ์— ์‚ฌ์šฉ
19
  NAVER_ID = os.getenv("I_8koTJh3R5l4wLurQbG") # ์„ ํƒ: ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰
20
  NAVER_SECRET = os.getenv("W5oWYlAgur")
21
 
22
+ MODEL_ID = "bucketresearch/politicalBiasBERT"
23
+ LABELS = ["left", "center", "right"] # ๋ชจ๋ธ ์นด๋“œ ์ •์˜
24
+
25
  st.set_page_config(page_title=APP_TITLE, page_icon="๐Ÿงญ", layout="wide")
26
  st.title(APP_TITLE)
27
+ st.caption("PoliticalBiasBERT๋กœ ํŽธํ–ฅ(์ขŒ/์ค‘/์šฐ) ๋ถ„๋ฅ˜ โ†’ ํ™”๋ฉด์—๋Š” '์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ'๋งŒ ๋…ธ์ถœ")
28
 
29
  # =========================
30
+ # Model (cached)
31
  # =========================
32
+ @st.cache_resource(show_spinner=True)
33
+ def load_bias_pipeline():
34
+ tok = AutoTokenizer.from_pretrained(MODEL_ID)
35
+ mdl = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
36
+ device = 0 if torch.cuda.is_available() else -1
37
+ clf = pipeline(
38
+ "text-classification",
39
+ model=mdl,
40
+ tokenizer=tok,
41
+ return_all_scores=True,
42
+ device=device
43
+ )
44
+ return clf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ def classify_bias(clf, text: str) -> Tuple[str, List[float]]:
47
+ scores = clf(text)[0] # list of dicts: [{"label":"LABEL_0","score":...}, ...]
48
+ # ๋ชจ๋ธ์ด LABEL_0/1/2๋ฅผ ์“ฐ๋ฏ€๋กœ index ๊ธฐ์ค€์œผ๋กœ ์ •๋ ฌ๋˜์–ด ๋“ค์–ด์˜จ๋‹ค๋Š” ๊ฐ€์ •
49
+ probs = [s["score"] for s in scores] # [left, center, right] ์ˆœ
50
+ pred_idx = int(torch.tensor(probs).argmax().item())
51
+ return LABELS[pred_idx], probs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # =========================
54
+ # Naver News (optional)
55
  # =========================
56
  def fetch_naver_news(query: str, display: int = 10) -> List[Dict[str,str]]:
57
  if not (NAVER_ID and NAVER_SECRET):
 
64
  if r.status_code != 200:
65
  return []
66
  items = r.json().get("items", [])
67
+ return [{
68
+ "title": it.get("title",""),
69
+ "desc": it.get("description",""),
70
+ "link": it.get("link","")
71
+ } for it in items]
 
 
 
72
  except Exception:
73
  return []
74
 
 
76
  # OpenAI: Fact-based neutral rewrite (optional)
77
  # =========================
78
  def generate_fact_based(text: str) -> str:
 
 
 
79
  if not OPENAI_API_KEY:
80
  return "(OPENAI_API_KEY ๋ฏธ์„ค์ •: ์žฌ๊ตฌ์„ฑ ์ƒ๋žต๋จ)"
81
  import openai
82
  openai.api_key = OPENAI_API_KEY
 
83
  prompt = (
84
+ "๋‹ค์Œ ํ…์ŠคํŠธ๋ฅผ ์ •์น˜์  ํ•ด์„/์˜๊ฒฌ ์—†์ด, ์‚ฌ์‹ค ์ค‘์‹ฌ์˜ ์ค‘๋ฆฝ ๊ธฐ์‚ฌ๋กœ ์žฌ๊ตฌ์„ฑํ•˜์„ธ์š”.\n"
85
+ "๊ทœ์น™: 1) ๋ˆ„๊ฐ€ยท์–ธ์ œยท์–ด๋””์„œยท๋ฌด์—‡์„ ์ค‘์‹ฌ 2) ํ‰๊ฐ€/์ถ”์ธก ์‚ญ์ œ 3) ์ˆ˜์น˜/๋‚ ์งœ๋Š” ์›๋ฌธ ๋ฒ”์œ„ 4) ํ•œ๊ตญ์–ด 5~7๋ฌธ์žฅ\n\n"
 
 
 
 
86
  f"[์›๋ฌธ]\n{text}\n\n[์ค‘๋ฆฝ ๊ธฐ์‚ฌ]"
87
  )
 
88
  try:
89
  resp = openai.ChatCompletion.create(
90
  model="gpt-4o-mini",
 
97
  return f"(์žฌ๊ตฌ์„ฑ ์‹คํŒจ: {e})"
98
 
99
  # =========================
100
+ # Sidebar
101
  # =========================
102
  with st.sidebar:
103
+ st.subheader("๋ชจ๋ธ ์ƒํƒœ")
104
+ with st.spinner("BERT ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘โ€ฆ ์ฒ˜์Œ ํ•œ ๋ฒˆ๋งŒ ๊ธฐ๋‹ค๋ฆฌ๋ฉด ๋จ"):
105
+ clf = load_bias_pipeline()
106
+ st.success("PoliticalBiasBERT ๋กœ๋“œ ์™„๋ฃŒ")
107
+ st.caption("์ขŒ/์ค‘/์šฐ ๋ถ„๋ฅ˜๋Š” ๋‚ด๋ถ€ ์ง„๋‹จ์šฉ์œผ๋กœ๋งŒ ์‚ฌ์šฉ. ํ™”๋ฉด์€ ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์žฌ๊ตฌ์„ฑ ์œ„์ฃผ.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  # =========================
110
+ # Main
111
  # =========================
112
  st.markdown("### 1) (์„ ํƒ) ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰")
113
  q = st.text_input("๊ฒ€์ƒ‰์–ด", value="๋ฏธ ๋Œ€์„ ")
114
  cnt = st.slider("ํ‘œ์‹œ ๊ฐœ์ˆ˜", 1, 20, 10)
115
  news_items: List[Dict[str,str]] = []
116
+ if st.button("๋‰ด์Šค ๋ถˆ๋Ÿฌ์˜ค๊ธฐ"):
117
+ with st.spinner("๋„ค์ด๋ฒ„ ๋‰ด์Šค ์ˆ˜์ง‘ ์ค‘โ€ฆ"):
118
+ news_items = fetch_naver_news(q, cnt)
119
+ if not news_items:
120
+ st.info("๋„ค์ด๋ฒ„ API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ ํ˜ธ์ถœ ์‹คํŒจ. ์•„๋ž˜ ์ž์œ  ์ž…๋ ฅ์œผ๋กœ ํ…Œ์ŠคํŠธํ•˜์„ธ์š”.")
 
 
 
121
 
122
  st.markdown("### 2) ํ…์ŠคํŠธ ๋ถ„์„ & ์‚ฌ๏ฟฝ๏ฟฝ๏ฟฝ ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ")
123
+ c1, c2 = st.columns(2)
124
+ with c1:
125
+ sample = f"{news_items[0]['title']} โ€” {news_items[0]['desc']}" if news_items else ""
 
 
 
 
126
  text = st.text_area("๋ถ„์„ํ•  ํ…์ŠคํŠธ(๋‰ด์Šค ์ œ๋ชฉ+์š”์•ฝ ๋“ฑ)", value=sample, height=220)
127
 
128
+ with c2:
129
  if st.button("๋ถ„์„ ๋ฐ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ ์‹คํ–‰"):
130
  if not text.strip():
131
  st.warning("ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.")
132
  else:
133
+ # ๋‚ด๋ถ€ ๋ถ„๋ฅ˜(์ง„๋‹จ์šฉ)
134
+ pred, probs = classify_bias(clf, text)
135
+
136
+ # ํ™”๋ฉด ๋…ธ์ถœ: ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์žฌ๊ตฌ์„ฑ
137
+ st.markdown("#### โœ… ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ ๊ฒฐ๊ณผ")
138
+ article = generate_fact_based(text)
139
+ st.write(article)
140
+
141
+ # ์ง„๋‹จ/์ถœ์ฒ˜
142
+ with st.expander("์ง„๋‹จ ๋ณด๊ธฐ(๋‚ด๋ถ€ ํŽธํ–ฅ ํ™•๋ฅ )"):
143
+ st.write(f"์˜ˆ์ธก: **{pred}**")
144
+ st.write(f"ํ™•๋ฅ  [left, center, right]: {probs}")
145
+ if news_items:
146
+ with st.expander("์›๋ฌธ ๋งํฌ"):
147
+ st.write(news_items[0].get("link","(๋งํฌ ์—†์Œ)"))
 
 
 
 
 
 
148
 
149
  st.markdown("---")
150
+ st.caption("๋ฐ๋ชจ ์šฉ๋„. ์‹ค์ œ ์„œ๋น„์Šค๋Š” ์ถœ์ฒ˜ ์ถ”์ถœยท์‚ฌ์‹ค ๊ฒ€์ฆยท์ •์ฑ… ํ•„ํ„ฐ๋ง์„ ์ถ”๊ฐ€ํ•ด์•ผ ํ•จ.")