File size: 5,959 Bytes
d3950e7
 
 
 
2c9004d
 
d3950e7
2c9004d
 
d3950e7
 
 
2c9004d
 
d3950e7
2c9004d
80a84a5
c30b3f4
89609e2
 
2c9004d
d3950e7
 
 
2c9004d
 
d3950e7
2c9004d
 
d3950e7
2c9004d
d3950e7
 
 
 
 
 
 
 
 
 
 
 
 
2c9004d
d3950e7
 
 
 
 
 
2c9004d
 
d3950e7
2c9004d
 
 
 
 
 
 
 
 
 
 
 
d3950e7
 
 
 
 
2c9004d
 
 
 
 
 
 
 
 
 
 
 
d3950e7
 
2c9004d
 
 
 
 
 
 
 
 
 
 
 
 
 
d3950e7
2c9004d
 
d3950e7
 
 
 
 
2c9004d
 
d3950e7
2c9004d
 
 
 
 
d3950e7
 
 
 
 
2c9004d
 
d3950e7
 
 
2c9004d
 
d3950e7
2c9004d
 
 
 
d3950e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c9004d
 
d3950e7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# ์ •์น˜ ํŽธํ–ฅ ๋ฐฉ์ง€ ยท ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ (BERT ๋ถ„๋ฅ˜๊ธฐ ๋ฒ„์ „)
# - ๋ถ„๋ฅ˜: bucketresearch/politicalBiasBERT (left/center/right)
# - ์žฌ๊ตฌ์„ฑ: OpenAI๋กœ ์‚ฌ์‹ค ์ค‘์‹ฌ ์š”์•ฝ/์žฌ์ž‘์„ฑ
# - ์˜ต์…˜: ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰

import os
from typing import List, Dict, Tuple

import streamlit as st
import requests
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# =========================
# Config
# =========================
APP_TITLE = "์ •์น˜ ํŽธํ–ฅ ๋ถ„์„(BERT) ยท ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ!"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")              # ์„ ํƒ
NAVER_ID       = os.getenv("NAVER_ID")             # ์„ ํƒ
NAVER_SECRET   = os.getenv("NAVER_SECRET")

MODEL_ID = "bucketresearch/politicalBiasBERT"
LABELS = ["left", "center", "right"]  # ๋ชจ๋ธ ์นด๋“œ ์ •์˜

st.set_page_config(page_title=APP_TITLE, page_icon="๐Ÿงญ", layout="wide")
st.title(APP_TITLE)
st.caption("PoliticalBiasBERT๋กœ ํŽธํ–ฅ(์ขŒ/์ค‘/์šฐ) ๋ถ„๋ฅ˜ โ†’ ํ™”๋ฉด์—๋Š” '์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ'๋งŒ ๋…ธ์ถœ")

# =========================
# Model (cached)
# =========================
@st.cache_resource(show_spinner=True)
def load_bias_pipeline():
    tok = AutoTokenizer.from_pretrained(MODEL_ID)
    mdl = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
    device = 0 if torch.cuda.is_available() else -1
    clf = pipeline(
        "text-classification",
        model=mdl,
        tokenizer=tok,
        return_all_scores=True,
        device=device
    )
    return clf

def classify_bias(clf, text: str) -> Tuple[str, List[float]]:
    scores = clf(text)[0]  # list of dicts: [{"label":"LABEL_0","score":...}, ...]
    # ๋ชจ๋ธ์ด LABEL_0/1/2๋ฅผ ์“ฐ๋ฏ€๋กœ index ๊ธฐ์ค€์œผ๋กœ ์ •๋ ฌ๋˜์–ด ๋“ค์–ด์˜จ๋‹ค๋Š” ๊ฐ€์ •
    probs = [s["score"] for s in scores]                     # [left, center, right] ์ˆœ
    pred_idx = int(torch.tensor(probs).argmax().item())
    return LABELS[pred_idx], probs

# =========================
# Naver News (optional)
# =========================
def fetch_naver_news(query: str, display: int = 10) -> List[Dict[str,str]]:
    if not (NAVER_ID and NAVER_SECRET):
        return []
    url = "https://openapi.naver.com/v1/search/news.json"
    headers = {"X-Naver-Client-Id": NAVER_ID, "X-Naver-Client-Secret": NAVER_SECRET}
    params = {"query": query, "display": min(display, 30), "start": 1, "sort": "date"}
    try:
        r = requests.get(url, headers=headers, params=params, timeout=15)
        if r.status_code != 200:
            return []
        items = r.json().get("items", [])
        return [{
            "title": it.get("title",""),
            "desc":  it.get("description",""),
            "link":  it.get("link","")
        } for it in items]
    except Exception:
        return []

# =========================
# OpenAI: Fact-based neutral rewrite (optional)
# =========================
def generate_fact_based(text: str) -> str:
    if not OPENAI_API_KEY:
        return "(OPENAI_API_KEY ๋ฏธ์„ค์ •: ์žฌ๊ตฌ์„ฑ ์ƒ๋žต๋จ)"
    import openai
    openai.api_key = OPENAI_API_KEY
    prompt = (
        "๋‹ค์Œ ํ…์ŠคํŠธ๋ฅผ ์ •์น˜์  ํ•ด์„/์˜๊ฒฌ ์—†์ด, ์‚ฌ์‹ค ์ค‘์‹ฌ์˜ ์ค‘๋ฆฝ ๊ธฐ์‚ฌ๋กœ ์žฌ๊ตฌ์„ฑํ•˜์„ธ์š”.\n"
        "๊ทœ์น™: 1) ๋ˆ„๊ฐ€ยท์–ธ์ œยท์–ด๋””์„œยท๋ฌด์—‡์„ ์ค‘์‹ฌ 2) ํ‰๊ฐ€/์ถ”์ธก ์‚ญ์ œ 3) ์ˆ˜์น˜/๋‚ ์งœ๋Š” ์›๋ฌธ ๋ฒ”์œ„ 4) ํ•œ๊ตญ์–ด 5~7๋ฌธ์žฅ\n\n"
        f"[์›๋ฌธ]\n{text}\n\n[์ค‘๋ฆฝ ๊ธฐ์‚ฌ]"
    )
    try:
        resp = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[{"role":"user","content":prompt}],
            temperature=0.3,
            max_tokens=420,
        )
        return resp["choices"][0]["message"]["content"].strip()
    except Exception as e:
        return f"(์žฌ๊ตฌ์„ฑ ์‹คํŒจ: {e})"

# =========================
# Sidebar
# =========================
with st.sidebar:
    st.subheader("๋ชจ๋ธ ์ƒํƒœ")
    with st.spinner("BERT ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘โ€ฆ ์ฒ˜์Œ ํ•œ ๋ฒˆ๋งŒ ๊ธฐ๋‹ค๋ฆฌ๋ฉด ๋จ"):
        clf = load_bias_pipeline()
    st.success("PoliticalBiasBERT ๋กœ๋“œ ์™„๋ฃŒ")
    st.caption("์ขŒ/์ค‘/์šฐ ๋ถ„๋ฅ˜๋Š” ๋‚ด๋ถ€ ์ง„๋‹จ์šฉ์œผ๋กœ๋งŒ ์‚ฌ์šฉ. ํ™”๋ฉด์€ ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์žฌ๊ตฌ์„ฑ ์œ„์ฃผ.")

# =========================
# Main
# =========================
st.markdown("### 1) (์„ ํƒ) ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰")
q = st.text_input("๊ฒ€์ƒ‰์–ด", value="๋ฏธ ๋Œ€์„ ")
cnt = st.slider("ํ‘œ์‹œ ๊ฐœ์ˆ˜", 1, 20, 10)
news_items: List[Dict[str,str]] = []
if st.button("๋‰ด์Šค ๋ถˆ๋Ÿฌ์˜ค๊ธฐ"):
    with st.spinner("๋„ค์ด๋ฒ„ ๋‰ด์Šค ์ˆ˜์ง‘ ์ค‘โ€ฆ"):
        news_items = fetch_naver_news(q, cnt)
    if not news_items:
        st.info("๋„ค์ด๋ฒ„ API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ ํ˜ธ์ถœ ์‹คํŒจ. ์•„๋ž˜ ์ž์œ  ์ž…๋ ฅ์œผ๋กœ ํ…Œ์ŠคํŠธํ•˜์„ธ์š”.")

st.markdown("### 2) ํ…์ŠคํŠธ ๋ถ„์„ & ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ")
c1, c2 = st.columns(2)
with c1:
    sample = f"{news_items[0]['title']} โ€” {news_items[0]['desc']}" if news_items else ""
    text = st.text_area("๋ถ„์„ํ•  ํ…์ŠคํŠธ(๋‰ด์Šค ์ œ๋ชฉ+์š”์•ฝ ๋“ฑ)", value=sample, height=220)

with c2:
    if st.button("๋ถ„์„ ๋ฐ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ ์‹คํ–‰"):
        if not text.strip():
            st.warning("ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.")
        else:
            # ๋‚ด๋ถ€ ๋ถ„๋ฅ˜(์ง„๋‹จ์šฉ)
            pred, probs = classify_bias(clf, text)

            # ํ™”๋ฉด ๋…ธ์ถœ: ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์žฌ๊ตฌ์„ฑ
            st.markdown("#### โœ… ์‚ฌ์‹ค ๊ธฐ๋ฐ˜ ์ค‘๋ฆฝ ์žฌ๊ตฌ์„ฑ ๊ฒฐ๊ณผ")
            article = generate_fact_based(text)
            st.write(article)

            # ์ง„๋‹จ/์ถœ์ฒ˜
            with st.expander("์ง„๋‹จ ๋ณด๊ธฐ(๋‚ด๋ถ€ ํŽธํ–ฅ ํ™•๋ฅ )"):
                st.write(f"์˜ˆ์ธก: **{pred}**")
                st.write(f"ํ™•๋ฅ  [left, center, right]: {probs}")
            if news_items:
                with st.expander("์›๋ฌธ ๋งํฌ"):
                    st.write(news_items[0].get("link","(๋งํฌ ์—†์Œ)"))

st.markdown("---")
st.caption("๋ฐ๋ชจ ์šฉ๋„. ์‹ค์ œ ์„œ๋น„์Šค๋Š” ์ถœ์ฒ˜ ์ถ”์ถœยท์‚ฌ์‹ค ๊ฒ€์ฆยท์ •์ฑ… ํ•„ํ„ฐ๋ง์„ ์ถ”๊ฐ€ํ•ด์•ผ ํ•จ.")