|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
from typing import List, Dict, Tuple |
|
|
|
|
|
import streamlit as st |
|
|
import requests |
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
APP_TITLE = "์ ์น ํธํฅ ๋ถ์(BERT) ยท ์ฌ์ค ๊ธฐ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ!" |
|
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
|
|
NAVER_ID = os.getenv("NAVER_ID") |
|
|
NAVER_SECRET = os.getenv("NAVER_SECRET") |
|
|
|
|
|
MODEL_ID = "bucketresearch/politicalBiasBERT" |
|
|
LABELS = ["left", "center", "right"] |
|
|
|
|
|
st.set_page_config(page_title=APP_TITLE, page_icon="๐งญ", layout="wide") |
|
|
st.title(APP_TITLE) |
|
|
st.caption("PoliticalBiasBERT๋ก ํธํฅ(์ข/์ค/์ฐ) ๋ถ๋ฅ โ ํ๋ฉด์๋ '์ฌ์ค ๊ธฐ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ'๋ง ๋
ธ์ถ") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource(show_spinner=True) |
|
|
def load_bias_pipeline(): |
|
|
tok = AutoTokenizer.from_pretrained(MODEL_ID) |
|
|
mdl = AutoModelForSequenceClassification.from_pretrained(MODEL_ID) |
|
|
device = 0 if torch.cuda.is_available() else -1 |
|
|
clf = pipeline( |
|
|
"text-classification", |
|
|
model=mdl, |
|
|
tokenizer=tok, |
|
|
return_all_scores=True, |
|
|
device=device |
|
|
) |
|
|
return clf |
|
|
|
|
|
def classify_bias(clf, text: str) -> Tuple[str, List[float]]: |
|
|
scores = clf(text)[0] |
|
|
|
|
|
probs = [s["score"] for s in scores] |
|
|
pred_idx = int(torch.tensor(probs).argmax().item()) |
|
|
return LABELS[pred_idx], probs |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_naver_news(query: str, display: int = 10) -> List[Dict[str,str]]: |
|
|
if not (NAVER_ID and NAVER_SECRET): |
|
|
return [] |
|
|
url = "https://openapi.naver.com/v1/search/news.json" |
|
|
headers = {"X-Naver-Client-Id": NAVER_ID, "X-Naver-Client-Secret": NAVER_SECRET} |
|
|
params = {"query": query, "display": min(display, 30), "start": 1, "sort": "date"} |
|
|
try: |
|
|
r = requests.get(url, headers=headers, params=params, timeout=15) |
|
|
if r.status_code != 200: |
|
|
return [] |
|
|
items = r.json().get("items", []) |
|
|
return [{ |
|
|
"title": it.get("title",""), |
|
|
"desc": it.get("description",""), |
|
|
"link": it.get("link","") |
|
|
} for it in items] |
|
|
except Exception: |
|
|
return [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_fact_based(text: str) -> str: |
|
|
if not OPENAI_API_KEY: |
|
|
return "(OPENAI_API_KEY ๋ฏธ์ค์ : ์ฌ๊ตฌ์ฑ ์๋ต๋จ)" |
|
|
import openai |
|
|
openai.api_key = OPENAI_API_KEY |
|
|
prompt = ( |
|
|
"๋ค์ ํ
์คํธ๋ฅผ ์ ์น์ ํด์/์๊ฒฌ ์์ด, ์ฌ์ค ์ค์ฌ์ ์ค๋ฆฝ ๊ธฐ์ฌ๋ก ์ฌ๊ตฌ์ฑํ์ธ์.\n" |
|
|
"๊ท์น: 1) ๋๊ฐยท์ธ์ ยท์ด๋์ยท๋ฌด์์ ์ค์ฌ 2) ํ๊ฐ/์ถ์ธก ์ญ์ 3) ์์น/๋ ์ง๋ ์๋ฌธ ๋ฒ์ 4) ํ๊ตญ์ด 5~7๋ฌธ์ฅ\n\n" |
|
|
f"[์๋ฌธ]\n{text}\n\n[์ค๋ฆฝ ๊ธฐ์ฌ]" |
|
|
) |
|
|
try: |
|
|
resp = openai.ChatCompletion.create( |
|
|
model="gpt-4o-mini", |
|
|
messages=[{"role":"user","content":prompt}], |
|
|
temperature=0.3, |
|
|
max_tokens=420, |
|
|
) |
|
|
return resp["choices"][0]["message"]["content"].strip() |
|
|
except Exception as e: |
|
|
return f"(์ฌ๊ตฌ์ฑ ์คํจ: {e})" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
st.subheader("๋ชจ๋ธ ์ํ") |
|
|
with st.spinner("BERT ๋ชจ๋ธ ๋ก๋ฉ ์คโฆ ์ฒ์ ํ ๋ฒ๋ง ๊ธฐ๋ค๋ฆฌ๋ฉด ๋จ"): |
|
|
clf = load_bias_pipeline() |
|
|
st.success("PoliticalBiasBERT ๋ก๋ ์๋ฃ") |
|
|
st.caption("์ข/์ค/์ฐ ๋ถ๋ฅ๋ ๋ด๋ถ ์ง๋จ์ฉ์ผ๋ก๋ง ์ฌ์ฉ. ํ๋ฉด์ ์ฌ์ค ๊ธฐ๋ฐ ์ฌ๊ตฌ์ฑ ์์ฃผ.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.markdown("### 1) (์ ํ) ๋ค์ด๋ฒ ๋ด์ค ๊ฒ์") |
|
|
q = st.text_input("๊ฒ์์ด", value="๋ฏธ ๋์ ") |
|
|
cnt = st.slider("ํ์ ๊ฐ์", 1, 20, 10) |
|
|
news_items: List[Dict[str,str]] = [] |
|
|
if st.button("๋ด์ค ๋ถ๋ฌ์ค๊ธฐ"): |
|
|
with st.spinner("๋ค์ด๋ฒ ๋ด์ค ์์ง ์คโฆ"): |
|
|
news_items = fetch_naver_news(q, cnt) |
|
|
if not news_items: |
|
|
st.info("๋ค์ด๋ฒ API ํค๊ฐ ์๊ฑฐ๋ ํธ์ถ ์คํจ. ์๋ ์์ ์
๋ ฅ์ผ๋ก ํ
์คํธํ์ธ์.") |
|
|
|
|
|
st.markdown("### 2) ํ
์คํธ ๋ถ์ & ์ฌ์ค ๊ธฐ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ") |
|
|
c1, c2 = st.columns(2) |
|
|
with c1: |
|
|
sample = f"{news_items[0]['title']} โ {news_items[0]['desc']}" if news_items else "" |
|
|
text = st.text_area("๋ถ์ํ ํ
์คํธ(๋ด์ค ์ ๋ชฉ+์์ฝ ๋ฑ)", value=sample, height=220) |
|
|
|
|
|
with c2: |
|
|
if st.button("๋ถ์ ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ ์คํ"): |
|
|
if not text.strip(): |
|
|
st.warning("ํ
์คํธ๋ฅผ ์
๋ ฅํ์ธ์.") |
|
|
else: |
|
|
|
|
|
pred, probs = classify_bias(clf, text) |
|
|
|
|
|
|
|
|
st.markdown("#### โ
์ฌ์ค ๊ธฐ๋ฐ ์ค๋ฆฝ ์ฌ๊ตฌ์ฑ ๊ฒฐ๊ณผ") |
|
|
article = generate_fact_based(text) |
|
|
st.write(article) |
|
|
|
|
|
|
|
|
with st.expander("์ง๋จ ๋ณด๊ธฐ(๋ด๋ถ ํธํฅ ํ๋ฅ )"): |
|
|
st.write(f"์์ธก: **{pred}**") |
|
|
st.write(f"ํ๋ฅ [left, center, right]: {probs}") |
|
|
if news_items: |
|
|
with st.expander("์๋ฌธ ๋งํฌ"): |
|
|
st.write(news_items[0].get("link","(๋งํฌ ์์)")) |
|
|
|
|
|
st.markdown("---") |
|
|
st.caption("๋ฐ๋ชจ ์ฉ๋. ์ค์ ์๋น์ค๋ ์ถ์ฒ ์ถ์ถยท์ฌ์ค ๊ฒ์ฆยท์ ์ฑ
ํํฐ๋ง์ ์ถ๊ฐํด์ผ ํจ.") |
|
|
|