Politic / app.py
820nam's picture
Update app.py
89609e2 verified
# ์ •์น˜ ํŽธํ–ฅ ๋ฐฉ์ง€ ยท ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ (BERT ๋ถ„๋ฅ˜๊ธฐ ๋ฒ„์ „)
# - ๋ถ„๋ฅ˜: bucketresearch/politicalBiasBERT (left/center/right)
# - ์žฌ๊ตฌ์„ฑ: OpenAI๋กœ ์‚ฌ์‹ค ์ค‘์‹ฌ ์š”์•ฝ/์žฌ์ž‘์„ฑ
# - ์˜ต์…˜: ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰
import os
from typing import List, Dict, Tuple
import streamlit as st
import requests
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
# =========================
# Config
# =========================
APP_TITLE = "์ •์น˜ ํŽธํ–ฅ ๋ถ„์„(BERT) ยท ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ!"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # ์„ ํƒ
NAVER_ID = os.getenv("NAVER_ID") # ์„ ํƒ
NAVER_SECRET = os.getenv("NAVER_SECRET")
MODEL_ID = "bucketresearch/politicalBiasBERT"
LABELS = ["left", "center", "right"] # ๋ชจ๋ธ ์นด๋“œ ์ •์˜
st.set_page_config(page_title=APP_TITLE, page_icon="๐Ÿงญ", layout="wide")
st.title(APP_TITLE)
st.caption("PoliticalBiasBERT๋กœ ํŽธํ–ฅ(์ขŒ/์ค‘/์šฐ) ๋ถ„๋ฅ˜ โ†’ ํ™”๋ฉด์—๋Š” '์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ'๋งŒ ๋…ธ์ถœ")
# =========================
# Model (cached)
# =========================
@st.cache_resource(show_spinner=True)
def load_bias_pipeline():
tok = AutoTokenizer.from_pretrained(MODEL_ID)
mdl = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
device = 0 if torch.cuda.is_available() else -1
clf = pipeline(
"text-classification",
model=mdl,
tokenizer=tok,
return_all_scores=True,
device=device
)
return clf
def classify_bias(clf, text: str) -> Tuple[str, List[float]]:
scores = clf(text)[0] # list of dicts: [{"label":"LABEL_0","score":...}, ...]
# ๋ชจ๋ธ์ด LABEL_0/1/2๋ฅผ ์“ฐ๋ฏ€๋กœ index ๊ธฐ์ค€์œผ๋กœ ์ •๋ ฌ๋˜์–ด ๋“ค์–ด์˜จ๋‹ค๋Š” ๊ฐ€์ •
probs = [s["score"] for s in scores] # [left, center, right] ์ˆœ
pred_idx = int(torch.tensor(probs).argmax().item())
return LABELS[pred_idx], probs
# =========================
# Naver News (optional)
# =========================
def fetch_naver_news(query: str, display: int = 10) -> List[Dict[str,str]]:
if not (NAVER_ID and NAVER_SECRET):
return []
url = "https://openapi.naver.com/v1/search/news.json"
headers = {"X-Naver-Client-Id": NAVER_ID, "X-Naver-Client-Secret": NAVER_SECRET}
params = {"query": query, "display": min(display, 30), "start": 1, "sort": "date"}
try:
r = requests.get(url, headers=headers, params=params, timeout=15)
if r.status_code != 200:
return []
items = r.json().get("items", [])
return [{
"title": it.get("title",""),
"desc": it.get("description",""),
"link": it.get("link","")
} for it in items]
except Exception:
return []
# =========================
# OpenAI: Fact-based neutral rewrite (optional)
# =========================
def generate_fact_based(text: str) -> str:
if not OPENAI_API_KEY:
return "(OPENAI_API_KEY ๋ฏธ์„ค์ •: ์žฌ๊ตฌ์„ฑ ์ƒ๋žต๋จ)"
import openai
openai.api_key = OPENAI_API_KEY
prompt = (
"๋‹ค์Œ ํ…์ŠคํŠธ๋ฅผ ์ •์น˜์  ํ•ด์„/์˜๊ฒฌ ์—†์ด, ์‚ฌ์‹ค ์ค‘์‹ฌ์˜ ์ค‘๋ฆฝ ๊ธฐ์‚ฌ๋กœ ์žฌ๊ตฌ์„ฑํ•˜์„ธ์š”.\n"
"๊ทœ์น™: 1) ๋ˆ„๊ฐ€ยท์–ธ์ œยท์–ด๋””์„œยท๋ฌด์—‡์„ ์ค‘์‹ฌ 2) ํ‰๊ฐ€/์ถ”์ธก ์‚ญ์ œ 3) ์ˆ˜์น˜/๋‚ ์งœ๋Š” ์›๋ฌธ ๋ฒ”์œ„ 4) ํ•œ๊ตญ์–ด 5~7๋ฌธ์žฅ\n\n"
f"[์›๋ฌธ]\n{text}\n\n[์ค‘๋ฆฝ ๊ธฐ์‚ฌ]"
)
try:
resp = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[{"role":"user","content":prompt}],
temperature=0.3,
max_tokens=420,
)
return resp["choices"][0]["message"]["content"].strip()
except Exception as e:
return f"(์žฌ๊ตฌ์„ฑ ์‹คํŒจ: {e})"
# =========================
# Sidebar
# =========================
with st.sidebar:
st.subheader("๋ชจ๋ธ ์ƒํƒœ")
with st.spinner("BERT ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘โ€ฆ ์ฒ˜์Œ ํ•œ ๋ฒˆ๋งŒ ๊ธฐ๋‹ค๋ฆฌ๋ฉด ๋จ"):
clf = load_bias_pipeline()
st.success("PoliticalBiasBERT ๋กœ๋“œ ์™„๋ฃŒ")
st.caption("์ขŒ/์ค‘/์šฐ ๋ถ„๋ฅ˜๋Š” ๋‚ด๋ถ€ ์ง„๋‹จ์šฉ์œผ๋กœ๋งŒ ์‚ฌ์šฉ. ํ™”๋ฉด์€ ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์žฌ๊ตฌ์„ฑ ์œ„์ฃผ.")
# =========================
# Main
# =========================
st.markdown("### 1) (์„ ํƒ) ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰")
q = st.text_input("๊ฒ€์ƒ‰์–ด", value="๋ฏธ ๋Œ€์„ ")
cnt = st.slider("ํ‘œ์‹œ ๊ฐœ์ˆ˜", 1, 20, 10)
news_items: List[Dict[str,str]] = []
if st.button("๋‰ด์Šค ๋ถˆ๋Ÿฌ์˜ค๊ธฐ"):
with st.spinner("๋„ค์ด๋ฒ„ ๋‰ด์Šค ์ˆ˜์ง‘ ์ค‘โ€ฆ"):
news_items = fetch_naver_news(q, cnt)
if not news_items:
st.info("๋„ค์ด๋ฒ„ API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ ํ˜ธ์ถœ ์‹คํŒจ. ์•„๋ž˜ ์ž์œ  ์ž…๋ ฅ์œผ๋กœ ํ…Œ์ŠคํŠธํ•˜์„ธ์š”.")
st.markdown("### 2) ํ…์ŠคํŠธ ๋ถ„์„ & ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ")
c1, c2 = st.columns(2)
with c1:
sample = f"{news_items[0]['title']} โ€” {news_items[0]['desc']}" if news_items else ""
text = st.text_area("๋ถ„์„ํ•  ํ…์ŠคํŠธ(๋‰ด์Šค ์ œ๋ชฉ+์š”์•ฝ ๋“ฑ)", value=sample, height=220)
with c2:
if st.button("๋ถ„์„ ๋ฐ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ ์‹คํ–‰"):
if not text.strip():
st.warning("ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.")
else:
# ๋‚ด๋ถ€ ๋ถ„๋ฅ˜(์ง„๋‹จ์šฉ)
pred, probs = classify_bias(clf, text)
# ํ™”๋ฉด ๋…ธ์ถœ: ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์žฌ๊ตฌ์„ฑ
st.markdown("#### โœ… ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ ๊ฒฐ๊ณผ")
article = generate_fact_based(text)
st.write(article)
# ์ง„๋‹จ/์ถœ์ฒ˜
with st.expander("์ง„๋‹จ ๋ณด๊ธฐ(๋‚ด๋ถ€ ํŽธํ–ฅ ํ™•๋ฅ )"):
st.write(f"์˜ˆ์ธก: **{pred}**")
st.write(f"ํ™•๋ฅ  [left, center, right]: {probs}")
if news_items:
with st.expander("์›๋ฌธ ๋งํฌ"):
st.write(news_items[0].get("link","(๋งํฌ ์—†์Œ)"))
st.markdown("---")
st.caption("๋ฐ๋ชจ ์šฉ๋„. ์‹ค์ œ ์„œ๋น„์Šค๋Š” ์ถœ์ฒ˜ ์ถ”์ถœยท์‚ฌ์‹ค ๊ฒ€์ฆยท์ •์ฑ… ํ•„ํ„ฐ๋ง์„ ์ถ”๊ฐ€ํ•ด์•ผ ํ•จ.")