import re import math import matplotlib.pyplot as plt import matplotlib.font_manager as fm import torch import torch.nn as nn from transformers import ElectraModel, AutoTokenizer import numpy as np from sklearn.linear_model import LinearRegression from collections import defaultdict import base64 from io import BytesIO # 폰트 설정 font_path = './NanumGothic.ttf' fm.fontManager.addfont(font_path) plt.rcParams['font.family'] = fm.FontProperties(fname=font_path).get_name() plt.rcParams['axes.unicode_minus'] = False # 라벨 정의 LABELS = [ ... ] # 생략 없이 LABEL 전체 리스트 삽입 NEGATIVE_EMOTIONS = [ ... ] # 생략 없이 NEGATIVE 전체 리스트 삽입 # 디바이스 device = "cuda" if torch.cuda.is_available() else "cpu" # 모델 정의 class KOTEtagger(nn.Module): def __init__(self): super().__init__() self.electra = ElectraModel.from_pretrained("beomi/KcELECTRA-base", revision='v2021').to(device) self.tokenizer = AutoTokenizer.from_pretrained("beomi/KcELECTRA-base", revision='v2021') self.classifier = nn.Linear(self.electra.config.hidden_size, 44).to(device) def forward(self, text): encoding = self.tokenizer.encode_plus( text, add_special_tokens=True, max_length=512, return_token_type_ids=False, padding="max_length", return_attention_mask=True, return_tensors='pt', ).to(device) output = self.electra(encoding["input_ids"], attention_mask=encoding["attention_mask"]) output = output.last_hidden_state[:, 0, :] output = self.classifier(output) return torch.sigmoid(output) # 모델 로드 trained_model = KOTEtagger() trained_model.load_state_dict(torch.load("kote_pytorch_lightning.bin", map_location=device), strict=False) trained_model.eval() # 함수들 def parse_dialogue(text): lines = text.strip().split('\n') return [ (match.group(1).strip(), match.group(2).strip()) for line in lines if (match := re.match(r"([^:]+):(.+)", line.strip())) ] def adjusted_score(raw_score, k=5): return 100 / (1 + math.exp(-k * (raw_score - 0.5))) def apply_ema(scores, alpha=0.4): if not scores: return [] smoothed = [scores[0]] for s in scores[1:]: smoothed.append(alpha * s + (1 - alpha) * smoothed[-1]) return smoothed # 메인 처리 함수 def predict_and_plot(raw_text): dialogue = parse_dialogue(raw_text) emotion_scores = defaultdict(lambda: defaultdict(list)) # 예측 for speaker, sentence in dialogue: preds = trained_model(sentence)[0] for label, score in zip(LABELS, preds): if label in NEGATIVE_EMOTIONS: adjusted = adjusted_score(score.item()) emotion_scores[speaker][label].append(adjusted) html_output = "" for speaker in emotion_scores: html_output += f"
- {label}: 예측 점수 {predicted:.2f}" if predicted >= 80: html_output += f" ⚠️ 경고!" html_output += "
" if plotted: ax.set_title(f"{speaker}의 부정 감정 변화 및 예측") ax.set_xlabel("발화 순서") ax.set_ylabel("감정 점수") ax.set_ylim(0, max(100, max_y + 10)) ax.legend() ax.grid(True) buf = BytesIO() plt.tight_layout() plt.savefig(buf, format='png') plt.close(fig) img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8') html_output += f"⚠️ 시각화할 수 있는 감정이 없습니다.