File size: 5,959 Bytes
d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d 80a84a5 c30b3f4 89609e2 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 2c9004d d3950e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# ์ ์น ํธํฅ ๋ฐฉ์ง ยท ์ฌ์ค ๊ธฐ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ (BERT ๋ถ๋ฅ๊ธฐ ๋ฒ์ )
# - ๋ถ๋ฅ: bucketresearch/politicalBiasBERT (left/center/right)
# - ์ฌ๊ตฌ์ฑ: OpenAI๋ก ์ฌ์ค ์ค์ฌ ์์ฝ/์ฌ์์ฑ
# - ์ต์
: ๋ค์ด๋ฒ ๋ด์ค ๊ฒ์
import os
from typing import List, Dict, Tuple
import streamlit as st
import requests
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
# =========================
# Config
# =========================
APP_TITLE = "์ ์น ํธํฅ ๋ถ์(BERT) ยท ์ฌ์ค ๊ธฐ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ!"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # ์ ํ
NAVER_ID = os.getenv("NAVER_ID") # ์ ํ
NAVER_SECRET = os.getenv("NAVER_SECRET")
MODEL_ID = "bucketresearch/politicalBiasBERT"
LABELS = ["left", "center", "right"] # ๋ชจ๋ธ ์นด๋ ์ ์
st.set_page_config(page_title=APP_TITLE, page_icon="๐งญ", layout="wide")
st.title(APP_TITLE)
st.caption("PoliticalBiasBERT๋ก ํธํฅ(์ข/์ค/์ฐ) ๋ถ๋ฅ โ ํ๋ฉด์๋ '์ฌ์ค ๊ธฐ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ'๋ง ๋
ธ์ถ")
# =========================
# Model (cached)
# =========================
@st.cache_resource(show_spinner=True)
def load_bias_pipeline():
tok = AutoTokenizer.from_pretrained(MODEL_ID)
mdl = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
device = 0 if torch.cuda.is_available() else -1
clf = pipeline(
"text-classification",
model=mdl,
tokenizer=tok,
return_all_scores=True,
device=device
)
return clf
def classify_bias(clf, text: str) -> Tuple[str, List[float]]:
scores = clf(text)[0] # list of dicts: [{"label":"LABEL_0","score":...}, ...]
# ๋ชจ๋ธ์ด LABEL_0/1/2๋ฅผ ์ฐ๋ฏ๋ก index ๊ธฐ์ค์ผ๋ก ์ ๋ ฌ๋์ด ๋ค์ด์จ๋ค๋ ๊ฐ์
probs = [s["score"] for s in scores] # [left, center, right] ์
pred_idx = int(torch.tensor(probs).argmax().item())
return LABELS[pred_idx], probs
# =========================
# Naver News (optional)
# =========================
def fetch_naver_news(query: str, display: int = 10) -> List[Dict[str,str]]:
if not (NAVER_ID and NAVER_SECRET):
return []
url = "https://openapi.naver.com/v1/search/news.json"
headers = {"X-Naver-Client-Id": NAVER_ID, "X-Naver-Client-Secret": NAVER_SECRET}
params = {"query": query, "display": min(display, 30), "start": 1, "sort": "date"}
try:
r = requests.get(url, headers=headers, params=params, timeout=15)
if r.status_code != 200:
return []
items = r.json().get("items", [])
return [{
"title": it.get("title",""),
"desc": it.get("description",""),
"link": it.get("link","")
} for it in items]
except Exception:
return []
# =========================
# OpenAI: Fact-based neutral rewrite (optional)
# =========================
def generate_fact_based(text: str) -> str:
if not OPENAI_API_KEY:
return "(OPENAI_API_KEY ๋ฏธ์ค์ : ์ฌ๊ตฌ์ฑ ์๋ต๋จ)"
import openai
openai.api_key = OPENAI_API_KEY
prompt = (
"๋ค์ ํ
์คํธ๋ฅผ ์ ์น์ ํด์/์๊ฒฌ ์์ด, ์ฌ์ค ์ค์ฌ์ ์ค๋ฆฝ ๊ธฐ์ฌ๋ก ์ฌ๊ตฌ์ฑํ์ธ์.\n"
"๊ท์น: 1) ๋๊ฐยท์ธ์ ยท์ด๋์ยท๋ฌด์์ ์ค์ฌ 2) ํ๊ฐ/์ถ์ธก ์ญ์ 3) ์์น/๋ ์ง๋ ์๋ฌธ ๋ฒ์ 4) ํ๊ตญ์ด 5~7๋ฌธ์ฅ\n\n"
f"[์๋ฌธ]\n{text}\n\n[์ค๋ฆฝ ๊ธฐ์ฌ]"
)
try:
resp = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[{"role":"user","content":prompt}],
temperature=0.3,
max_tokens=420,
)
return resp["choices"][0]["message"]["content"].strip()
except Exception as e:
return f"(์ฌ๊ตฌ์ฑ ์คํจ: {e})"
# =========================
# Sidebar
# =========================
with st.sidebar:
st.subheader("๋ชจ๋ธ ์ํ")
with st.spinner("BERT ๋ชจ๋ธ ๋ก๋ฉ ์คโฆ ์ฒ์ ํ ๋ฒ๋ง ๊ธฐ๋ค๋ฆฌ๋ฉด ๋จ"):
clf = load_bias_pipeline()
st.success("PoliticalBiasBERT ๋ก๋ ์๋ฃ")
st.caption("์ข/์ค/์ฐ ๋ถ๋ฅ๋ ๋ด๋ถ ์ง๋จ์ฉ์ผ๋ก๋ง ์ฌ์ฉ. ํ๋ฉด์ ์ฌ์ค ๊ธฐ๋ฐ ์ฌ๊ตฌ์ฑ ์์ฃผ.")
# =========================
# Main
# =========================
st.markdown("### 1) (์ ํ) ๋ค์ด๋ฒ ๋ด์ค ๊ฒ์")
q = st.text_input("๊ฒ์์ด", value="๋ฏธ ๋์ ")
cnt = st.slider("ํ์ ๊ฐ์", 1, 20, 10)
news_items: List[Dict[str,str]] = []
if st.button("๋ด์ค ๋ถ๋ฌ์ค๊ธฐ"):
with st.spinner("๋ค์ด๋ฒ ๋ด์ค ์์ง ์คโฆ"):
news_items = fetch_naver_news(q, cnt)
if not news_items:
st.info("๋ค์ด๋ฒ API ํค๊ฐ ์๊ฑฐ๋ ํธ์ถ ์คํจ. ์๋ ์์ ์
๋ ฅ์ผ๋ก ํ
์คํธํ์ธ์.")
st.markdown("### 2) ํ
์คํธ ๋ถ์ & ์ฌ์ค ๊ธฐ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ")
c1, c2 = st.columns(2)
with c1:
sample = f"{news_items[0]['title']} โ {news_items[0]['desc']}" if news_items else ""
text = st.text_area("๋ถ์ํ ํ
์คํธ(๋ด์ค ์ ๋ชฉ+์์ฝ ๋ฑ)", value=sample, height=220)
with c2:
if st.button("๋ถ์ ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ ์คํ"):
if not text.strip():
st.warning("ํ
์คํธ๋ฅผ ์
๋ ฅํ์ธ์.")
else:
# ๋ด๋ถ ๋ถ๋ฅ(์ง๋จ์ฉ)
pred, probs = classify_bias(clf, text)
# ํ๋ฉด ๋
ธ์ถ: ์ฌ์ค ๊ธฐ๋ฐ ์ฌ๊ตฌ์ฑ
st.markdown("#### โ
์ฌ์ค ๊ธฐ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ ๊ฒฐ๊ณผ")
article = generate_fact_based(text)
st.write(article)
# ์ง๋จ/์ถ์ฒ
with st.expander("์ง๋จ ๋ณด๊ธฐ(๋ด๋ถ ํธํฅ ํ๋ฅ )"):
st.write(f"์์ธก: **{pred}**")
st.write(f"ํ๋ฅ [left, center, right]: {probs}")
if news_items:
with st.expander("์๋ฌธ ๋งํฌ"):
st.write(news_items[0].get("link","(๋งํฌ ์์)"))
st.markdown("---")
st.caption("๋ฐ๋ชจ ์ฉ๋. ์ค์ ์๋น์ค๋ ์ถ์ฒ ์ถ์ถยท์ฌ์ค ๊ฒ์ฆยท์ ์ฑ
ํํฐ๋ง์ ์ถ๊ฐํด์ผ ํจ.")
|