project-tdm / crossref_model.py
hy
round
1ae484c
from sentence_transformers import SentenceTransformer, util
from keybert import KeyBERT
import os
import sys
import urllib.request # 1. requests ๋Œ€์‹  urllib ์ž„ํฌํŠธ
import json # 2. JSON ํŒŒ์‹ฑ์„ ์œ„ํ•ด ์ž„ํฌํŠธ
# --- 1. ๋ชจ๋ธ ๋กœ๋“œ ---
try:
sbert_model = SentenceTransformer("jhgan/ko-sbert-nli")
kw_model = KeyBERT()
except Exception as e:
print(f"๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
sbert_model = None
kw_model = None
# --- 2. ํ•˜์œ„ ํ•จ์ˆ˜ ์ •์˜ ---
def extract_keywords(text: str) -> list:
"""(TM 1) KeyBERT๋กœ ํ…์ŠคํŠธ์—์„œ ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค."""
if not kw_model or not text:
return []
keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 1), top_n=5, stop_words=['๊ธฐ์ž', 'ํŠนํŒŒ์›', '์˜ค์ „', '์˜คํ›„', '์ž…๋‹ˆ๋‹ค', '์œ„ํ•ด'])
return [kw[0] for kw in keywords]
import ssl
def search_naver_api(keywords: list) -> list:
"""(API) Naver ๊ฒ€์ƒ‰ API๋กœ Snippet,Link ์ˆ˜์ง‘ (urllib.request + SSL ์šฐํšŒ)"""
NAVER_ID = os.environ.get("NAVER_ID")
NAVER_SECRET = os.environ.get("NAVER_SECRET")
# --- Check : ํ‚ค์›Œ๋“œ ํ™•์ธ ---
if not keywords:
print("[DEBUG] 'keywords' ๋ฆฌ์ŠคํŠธ๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค.")
return []
query = " ".join(keywords)
encText = urllib.parse.quote(query)
url = f"https://openapi.naver.com/v1/search/news.json?query={encText}&display=10&sort=sim"
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id", NAVER_ID)
request.add_header("X-Naver-Client-Secret", NAVER_SECRET)
context = ssl._create_unverified_context()
try:
response = urllib.request.urlopen(request, context=context)
rescode = response.getcode()
print(f"[DEBUG] Naver API ์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {rescode}")
if rescode == 200:
response_body = response.read()
response_text = response_body.decode('utf-8')
results = json.loads(response_text).get('items', [])
outputs = []
for item in results:
if 'description' in item and 'link' in item:
outputs.append({
"snippet": item['description'].replace('<b>', '').replace('</b>', ''),
"url": item['link']
})
return outputs
#snippets = [item['description'].replace('<b>', '').replace('</b>', '') for item in results if 'description' in item]
#return snippets
else:
print(f"[DEBUG] ๐Ÿšจ Naver API๊ฐ€ ์˜ค๋ฅ˜ ์ฝ”๋“œ๋ฅผ ๋ฐ˜ํ™˜: {rescode}")
return []
except urllib.error.HTTPError as http_err: # HTTP ์—๋Ÿฌ
print(f"[DEBUG] ๐Ÿšจ Naver API HTTP ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {http_err.code} - {http_err.reason}")
try:
print(f"[DEBUG] ๐Ÿšจ ์‘๋‹ต ๋‚ด์šฉ: {http_err.read().decode('utf-8')}")
except: pass
except urllib.error.URLError as url_err: # ๋„คํŠธ์›Œํฌ ์—๋Ÿฌ (SSL ํฌํ•จ)
print(f"[DEBUG] ๐Ÿšจ Naver API URL/๋„คํŠธ์›Œํฌ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {url_err.reason}")
except Exception as e:
print(f"[DEBUG] ๐Ÿšจ Naver API (urllib) ํ˜ธ์ถœ ์ค‘ ์•Œ ์ˆ˜ ์—†๋Š” ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {type(e).__name__} - {e}")
return []
def get_similarity_score(original_text: str, snippets: list): # -> ๋ฐ˜ํ™˜ ํƒ€์ž…์ด tensor๋กœ ๋ฐ”๋€œ!
"""(TM 2) SBERT๋กœ ์›๋ณธ๊ณผ Snippet ๊ฐ„์˜ ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ 'ํ…์„œ'๋ฅผ ๊ณ„์‚ฐํ•ฉ๋‹ˆ๋‹ค."""
if not snippets or not sbert_model:
return None # <-- ์‹คํŒจ ์‹œ None ๋ฐ˜ํ™˜
try:
original_embedding = sbert_model.encode(original_text)
snippet_embeddings = sbert_model.encode(snippets)
cosine_scores = util.cos_sim(original_embedding, snippet_embeddings)
return cosine_scores
except Exception as e:
return None
# --- 3. ์ตœ์ข… ๋ฉ”์ธ ํ•จ์ˆ˜ ---
def get_crossref_score_and_reason(article_body: str) -> dict:
"""'๋‚ด์šฉ ๋น„์‹ ๋ขฐ์„ฑ' ๋ชจ๋“ˆ์˜ ์ตœ์ข… ๊ฒฐ๊ณผ๋ฌผ์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
keywords = extract_keywords(article_body)
if not keywords:
return {
"score": 1.0,
"reason": "๋ณธ๋ฌธ์—์„œ ํ•ต์‹ฌ ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.",
"recommendation": "๋ณธ๋ฌธ์ด ๋„ˆ๋ฌด ์งง๊ฑฐ๋‚˜ ๋ถ„์„ํ•  ์ˆ˜ ์—†๋Š” ๋‚ด์šฉ์ž…๋‹ˆ๋‹ค.",
"found_urls": []
}
print(f"[DEBUG] ์ถ”์ถœ๋œ ํ‚ค์›Œ๋“œ: {keywords}")
search_results = search_naver_api(keywords)
if not search_results:
return {
"score": 1.0,
"reason": "๊ด€๋ จ ์ฃผ์ œ๋ฅผ ๋‹ค๋ฃฌ ๊ต์ฐจ ๊ฒ€์ฆ ๊ธฐ์‚ฌ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.",
"recommendation": "์ฃผ์š” ํ‚ค์›Œ๋“œ๊ฐ€ ํƒ€ ์–ธ๋ก ์‚ฌ์—์„œ๋„ ๋‹ค๋ฃจ์–ด์ง€๋Š”์ง€ ํ™•์ธ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.",
"paired_results": []
}
snippets = [item['snippet'] for item in search_results]
found_urls = [item['url'] for item in search_results]
cosine_scores = get_similarity_score(article_body, snippets)
if cosine_scores is None:
return {
"score": 1.0,
"reason": "SBERT ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.",
"recommendation": "๋ชจ๋ธ ์„œ๋ฒ„๋ฅผ ํ™•์ธํ•˜์„ธ์š”.",
"paired_results": []
}
avg_similarity = cosine_scores.mean().item()
# URL + ๊ฐœ๋ณ„ ์ ์ˆ˜' ์Œ(pair) ๋ฆฌ์ŠคํŠธ
paired_results = []
for i in range(len(snippets)):
paired_results.append({
"url": found_urls[i],
"similarity": cosine_scores[0][i].item() # 0~1 ์‚ฌ์ด์˜ SBERT ์ ์ˆ˜
})
final_score = 1.0 - avg_similarity
reason = f"๊ต์ฐจ ๊ฒ€์ฆ๋œ ๊ธฐ์‚ฌ {len(snippets)}๊ฑด๊ณผ์˜ ํ‰๊ท  ๋‚ด์šฉ ์ผ์น˜๋„๋Š” {avg_similarity*100:.0f}%์ž…๋‹ˆ๋‹ค."
recommendation = "์–‘ํ˜ธํ•ฉ๋‹ˆ๋‹ค."
if avg_similarity < 0.3:
reason = f"๊ด€๋ จ ๊ธฐ์‚ฌ {len(snippets)}๊ฑด๊ณผ ๋‚ด์šฉ ์ผ์น˜๋„๊ฐ€ ๋งค์šฐ ๋‚ฎ์Šต๋‹ˆ๋‹ค. (ํ‰๊ท  {avg_similarity*100:.0f}%)"
recommendation = "๊ธฐ์‚ฌ์˜ ํ•ต์‹ฌ ์‚ฌ์‹ค๊ด€๊ณ„๊ฐ€ ํƒ€ ์–ธ๋ก ์‚ฌ์—์„œ๋„ ๋‹ค๋ฃจ์–ด์ง€๋Š”์ง€ ํ™•์ธ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค."
return {
"score": max(0, min(1, round(final_score,4))),
"reason": reason,
"recommendation": recommendation,
"paired_results": paired_results
}
# --- 4. ํ…Œ์ŠคํŠธ ์ฝ”๋“œ ---
if __name__ == "__main__":
print("CrossrefScore ๋ชจ๋“ˆ ํ…Œ์ŠคํŠธ ์‹œ์ž‘...")
test_body="""
๋ฐฐ๋“œ๋ฏผํ„ด ๊ตญ๊ฐ€๋Œ€ํ‘œ ์•ˆ์„ธ์˜์ด ์ธ๋„์˜คํ”ˆ ๊ฒฐ์Šน์ „์—์„œ 2-0 ์™„์Šน์„ ๊ฑฐ๋‘๊ณ  2์ฃผ ์—ฐ์† ๊ตญ์ œ ๋Œ€ํšŒ์—์„œ ์šฐ์Šน์„ ์ฐจ์ง€ํ•˜๋Š” ์พŒ๊ฑฐ๋ฅผ ์ด๋ค˜๋‹ค. ๊ทธ๋Ÿฐ๋ฐ ์•ˆ์„ธ์˜์—๊ฒŒ '์ข‹์ง€ ์•Š์€' ์†Œ์‹๋„ ํ•จ๊ป˜ ์ „ํ•ด์กŒ๋‹ค.
์•ˆ์„ธ์˜์ด 2025๋…„ ์ƒˆํ•ด ๋“ค์–ด ์น˜๋ฅธ 2์ฐจ๋ก€ ๊ตญ์ œ ๋Œ€ํšŒ๋ฅผ ๋ชจ๋‘ ์ œํŒจํ–ˆ๋‹ค.
์•ˆ์„ธ์˜์€ ์ง€๋‚œ 19์ผ ์ธ๋„ ๋‰ด๋ธ๋ฆฌ์—์„œ ์—ด๋ฆฐ ์„ธ๊ณ„๋ฐฐ๋“œ๋ฏผํ„ด์—ฐ๋งน ์›”๋“œํˆฌ์–ด ์Šˆํผ 750 ์ธ๋„์˜คํ”ˆ ์—ฌ์ž ๋‹จ์‹ ๊ฒฐ์Šน์ „์—์„œ ์„ธ๊ณ„ 12์œ„ ํƒœ๊ตญ์˜ ํฌ๋ฅธํŒŒ์œ„ ์ดˆ์ถ”์›ก์„ 2-0์œผ๋กœ ๋ฌผ๋ฆฌ์น˜๋ฉฐ ์šฐ์Šน์„ ์ฐจ์ง€ํ–ˆ๋‹ค.
์•ˆ์„ธ์˜์€ ์ด๋‚  ๊ฒฐ์Šน์ „์—์„œ ์ผ์ฐŒ๊ฐ์น˜ ์Šน๊ธฐ๋ฅผ ์žก์•˜๋‹ค.
์•ˆ์„ธ์˜์€ 1๊ฒŒ์ž„์„ 21-12๋กœ ์••๋„ํ–ˆ๋‹ค. 2๊ฒŒ์ž„์—์„œ๋„ ํŠน์œ ์˜ ์ฒ ๋ฒฝ ์ˆ˜๋น„๋กœ 15-6๊นŒ์ง€ ๊ฒฉ์ฐจ๋ฅผ ๋ฒŒ๋ ธ๋‹ค. ํŠนํžˆ 9-18๋กœ ๋’ค์ง„ ์ƒํ™ฉ์—์„œ ๋งˆ์ง€๋ง‰ ํž˜์„ ๋‹คํ•ด ํƒœ๊ตญ์˜ ์ดˆ์ถ”์›ก์˜ ๋‚ ์นด๋กœ์šด ๊ณต๊ฒฉ์„ ๋ชจ๋‘ ๋งž๋ฐ›์•„์นœ ๋์— ์ƒ๋Œ€์˜ ๋ฒ”์‹ค์„ ์œ ๋„ํ•ด ๋‚ด๋ฉฐ ์ถ”๊ฒฉ ์˜์ง€๋ฅผ ๊บพ์–ด๋ฒ„๋ ธ๋‹ค. 2๊ฒŒ์ž„ ์Šค์ฝ”์–ด๋Š” 21-9์˜€๋‹ค.
์•ˆ์„ธ์˜์€ 12์ผ ๋ง๋ ˆ์ด์‹œ์•„ ์ฟ ์•Œ๋ผ๋ฃธํ‘ธ๋ฅด์—์„œ ๋๋‚œ ์›”๋“œํˆฌ์–ด ์Šˆํผ 1000 ๋ง๋ ˆ์ด์‹œ์•„์˜คํ”ˆ์—์„œ ์˜ฌํ•ด ์ฒซ ์šฐ์Šน์„ ์ฐจ์ง€ํ•œ ๋ฐ ์ด์–ด 2์ฃผ ์—ฐ์†์œผ๋กœ ์šฐ์Šน ํŠธ๋กœํ”ผ๋ฅผ ๋”ฐ๋ƒˆ๋‹ค.
์•ˆ์„ธ์˜์€ ์ด๋ฒˆ์— ์ถœ์ „ํ•œ ์ธ๋„์˜คํ”ˆ์—์„œ 5๊ฒฝ๊ธฐ๋ฅผ ์น˜๋ฅด๋Š” ๋™์•ˆ ํ•œ ๊ฒŒ์ž„๋„ ๋‚ด์ฃผ์ง€ ์•Š๋Š” ์™„๋ฒฝํ•œ ๊ฒฝ๊ธฐ ์šด์˜์œผ๋กœ ๋ฐฐ๋“œ๋ฏผํ„ด ์—ฌ์ž๋‹จ์‹ ์„ธ๊ณ„ 1์œ„ ๋‹ค์šด '์ตœ๊ฐ• ์‹ค๋ ฅ'์„ ์ž๋ž‘ํ–ˆ๋‹ค.
ํ•ด 2์ฃผ ์—ฐ์† ๊ตญ์ œ ๋Œ€ํšŒ์—์„œ ์šฐ์Šนํ•˜๋ฉฐ ๊ธฐ์จ์„ ๋งŒ๋ฝํ•œ ์•ˆ์„ธ์˜ ์ž…์žฅ์—์„  ๋ถˆ์พŒํ•  ์ˆ˜ ์žˆ๋Š” ์†Œ์‹์ด ํ•จ๊ป˜ ์ „ํ•ด์กŒ๋‹ค. ๋Œ€ํ•œ๋ฐฐ๋“œ๋ฏผํ„ดํ˜‘ํšŒ ๊น€ํƒ๊ทœ ํšŒ์žฅ์ด ์ฐจ๊ธฐ ํšŒ์žฅ ์„ ๊ฑฐ์— '๊ธฐํ˜ธ 4๋ฒˆ'์„ ๋‹ฌ๊ณ  ์ถœ๋งˆํ•  ์˜ˆ์ •์ธ ๊ฒƒ์œผ๋กœ ์ „ํ•ด์กŒ๋‹ค.
์•ˆ์„ธ์˜์€ ์ง€๋‚œํ•ด ํŒŒ๋ฆฌ์˜ฌ๋ฆผํ”ฝ์—์„œ ๊ธˆ๋ฉ”๋‹ฌ์„ ๋”ด ์ดํ›„ ๊น€ํƒ๊ทœ ํšŒ์žฅ์ด ์ด๋Œ์–ด์˜จ ๋ฐฐ๋“œ๋ฏผํ„ดํ˜‘ํšŒ์™€ ๋ฐฐ๋“œ๋ฏผํ„ด๋Œ€ํ‘œํŒ€ ์šด์˜์˜ ๋ฌธ์ œ๋ฅผ ํญ๋กœํ•˜๋Š” '์ž‘์‹ฌ ๋ฐœ์–ธ'์„ ํ–ˆ๋‹ค. ๋‹น์‹œ ์•ˆ์„ธ์˜์˜ ์šฉ๊ธฐ ์žˆ๋Š” ์™ธ์นจ์€ ํ•œ๊ตญ ๋ฐฐ๋“œ๋ฏผํ„ด๊ณ„์˜ ๊ฐœํ˜์„ ์ด‰๊ตฌํ•˜๋Š” ๋ชฉ์†Œ๋ฆฌ๋กœ ์ด์–ด์กŒ๋‹ค.
20์ผ ์ดํˆฌ๋ฐ์ด ๋ณด๋„์— ๋”ฐ๋ฅด๋ฉด ๋ฐฐ๋“œ๋ฏผํ„ดํ˜‘ํšŒ๋Š” ๋ชฉ์š”์ผ์ธ 23์ผ ์ฐจ๊ธฐ ๋ฐฐ๋“œ๋ฏผํ„ดํ˜‘ํšŒ์žฅ ์„ ๊ฑฐ๋ฅผ ์น˜๋ฅด๊ธฐ๋กœ ํ–ˆ๋‹ค. ๊ถ์ง€์— ๋ชฐ๋ ธ๋‹ค๊ฐ€ ๊ทน์ ์œผ๋กœ ์ถœ๋งˆ ์ž๊ฒฉ์„ ํšŒ๋ณตํ•œ ๊น€ํƒ๊ทœ ํšŒ์žฅ ์—ญ์‹œ ๊ธฐํ˜ธ 4๋ฒˆ์œผ๋กœ ์ด๋ฒˆ ์„ ๊ฑฐ์— ๋‚˜์„œ๋Š” ๊ฒƒ์œผ๋กœ ์ „ํ•ด์กŒ๋‹ค.
๋ฐฐ๋“œ๋ฏผํ„ดํ˜‘ํšŒ๋Š” ์ด๋‚  "(๋ฐฐ๋“œ๋ฏผํ„ดํ˜‘ํšŒ) ์„ ๊ฑฐ์šด์˜์œ„์›ํšŒ๋Š” ๋ฏธ๋ค„์กŒ๋˜ ์ฐจ๊ธฐ ํšŒ์žฅ ์„ ๊ฑฐ๋ฅผ 23์ผ ์˜ค์ „ 10์‹œ๋ถ€ํ„ฐ ์˜คํ›„ 5์‹œ๊นŒ์ง€ ์ง„ํ–‰ํ•˜๊ธฐ๋กœ ํ–ˆ๋‹ค"๋ผ๊ณ  ๋ฐํ˜”๋‹ค.
๋ฐฐ๋“œ๋ฏผํ„ดํ˜‘ํšŒ์žฅ ์„ ๊ฑฐ๋Š” ์• ์ดˆ 16์ผ ์—ด๋ ธ์–ด์•ผ ํ–ˆ์ง€๋งŒ ์„ ๊ฑฐ์šด์˜์œ„์›ํšŒ๊ฐ€ ์ž…ํ›„๋ณด๋ฅผ ๋ถˆํ—ˆํ•œ ๊น€ํƒ๊ทœ ํšŒ์žฅ์ด ํ›„๋ณด์ž ๋“ฑ๋ก ๋ฌดํšจ ํšจ๋ ฅ ์ •์ง€ ๊ฐ€์ฒ˜๋ถ„ ์‹ ์ฒญ์„ ์ œ๊ธฐํ–ˆ๊ณ  ๋ฒ•์›์ด ์ด๋ฅผ ๋ฐ›์•„๋“ค์ด๋ฉด์„œ ์„ ๊ฑฐ๊ฐ€ 1์ฐจ๋ก€ ๋ฏธ๋ค„์กŒ๋‹ค. ๋ฒ•์›์€ ๊ธฐ์กด ์„ ๊ฑฐ์šด์˜์œ„์›ํšŒ์˜ ๊ฒฐ์ •์— ์ค‘๋Œ€ํ•œ ์ ˆ์ฐจ์  ํ•˜์ž๊ฐ€ ์žˆ๋Š” ๋งŒํผ ์ž…ํ›„๋ณด ๋ถˆํ—ˆ ์กฐ์ฒ˜์˜ ํšจ๋ ฅ์„ ์ž„์‹œ๋กœ๋ผ๋„ ์ •์ง€ํ•ด์•ผ ํ•œ๋‹ค๊ณ  ํŒ๋‹จํ–ˆ๋‹ค.
ํ›„๋ณด ์ž๊ฒฉ์„ ๋˜์ฐพ์€ ๊น€ํƒ๊ทœ ํšŒ์žฅ์€ ์ž…์žฅ๋ฌธ์„ ํ†ตํ•ด ์„ ๊ฑฐ์šด์˜์œ„์›ํšŒ๋ฅผ ๊ฐ•ํ•˜๊ฒŒ ๋น„ํŒํ–ˆ๋‹ค.
๊น€ ํšŒ์žฅ์€ "์„ ๊ฑฐ์šด์˜์œ„์›ํšŒ๊ฐ€ 23์ผ๋กœ ๋‚ ์งœ๋ฅผ ์žก์€ ๊ฒƒ์€ ์ง€๋‚œ 9์ผ๋ถ€ํ„ฐ ์„ ๊ฑฐ ์šด๋™์— ๋Œ์ž…ํ•œ ์„ธ ํ›„๋ณด์™€ ๋น„๊ตํ•˜๋ฉด (๋‚˜์—๊ฒ) ๋„ˆ๋ฌด๋‚˜ ๋ถˆ๊ณต์ •ํ•œ ๊ฒฐ์ •"์ด๋ผ๊ณ  ์ง€์ ํ–ˆ๋‹ค.
์ด์–ด "๋ฒ•์›์˜ ํŒ๊ฒฐ์„ ๋ฌด์‹œํ•œ ์„ ๊ฑฐ์šด์˜์œ„์›ํšŒ์™€ ์ด๋ฅผ ๋ฐฉ๊ด€ ์ค‘์ธ ํ˜‘ํšŒ๋ฅผ ์ƒ๋Œ€๋กœ ๊ฐ•๋ ฅํ•œ ๋ฒ•์  ์กฐ์น˜์™€ ๋”๋ถˆ์–ด ๋‹ค์‹œ ์„ ๊ฑฐ ์ค‘์ง€ ๊ฐ€์ฒ˜๋ถ„ ์‹ ์ฒญ์„ ํ•˜๋ ค ํ–ˆ๋‹ค. ํ•˜์ง€๋งŒ ๋Œ€ํ•œ๋ฏผ๊ตญ ๋ฐฐ๋“œ๋ฏผํ„ด๊ณผ ์„ ์ˆ˜, ์ง€๋„์ž, ๋™ํ˜ธ์ธ๋“ค์„ ์‚ฌ๋ž‘ํ•˜๋Š” ์‚ฌ๋žŒ์œผ๋กœ์„œ ์ฐจ๋งˆ ๊ทธ๋ ‡๊ฒŒ๊นŒ์ง€ ํ•˜๋ฉด ์•ˆ ๋œ๋‹ค๋Š” ๊ฒฐ๋ก ์„ ๋ƒˆ๋‹ค. ์ด ์‹œ๊ฐ„๋ถ€๋กœ ์„ ๊ฑฐ์šด์˜์œ„์›ํšŒ์˜ ๊ฒฐ์ •์„ ์ˆ˜์šฉํ•˜๊ณ  ์ด๋ฒˆ ์„ ๊ฑฐ์— ์ž„ํ•  ๊ฒƒ"์ด๋ผ๊ณ  ๋ง๋ถ™์˜€๋‹ค.
๋ฐฐ๋“œ๋ฏผํ„ดํ˜‘ํšŒ์žฅ ์„ ๊ฑฐ์—๋Š” ์ตœ์Šนํƒ ์ „ ๋Œ€๊ตฌ๋ฐฐ๋“œ๋ฏผํ„ดํ˜‘ํšŒ์žฅ(ํƒœ์„ฑ์‚ฐ์—… ๋Œ€ํ‘œ), ์ „๊ฒฝํ›ˆ ํ•œ๊ตญ์‹ค์—…๋ฐฐ๋“œ๋ฏผํ„ด์—ฐ๋งน ํšŒ์žฅ(์—ด์ •์ฝ”๋ฆฌ์•„ ๋Œ€ํ‘œ์ด์‚ฌ), ์˜ฌ๋ฆผํ”ฝ ๊ธˆ๋ฉ”๋‹ฌ๋ฆฌ์ŠคํŠธ ์ถœ์‹ ์˜ ๊น€๋™๋ฌธ ์›๊ด‘๋Œ€ ์Šคํฌ์ธ ๊ณผํ•™๋ถ€ ๊ต์ˆ˜๊ฐ€ ํ›„๋ณด๋กœ ๋“ฑ๋กํ–ˆ๋‹ค. ์—ฌ๊ธฐ์— ๊น€ํƒ๊ทœ ํšŒ์žฅ์ด ํ•จ๊ป˜ ํ›„๋ณด๋กœ ์„ ๊ฑฐ๋ฅผ ์น˜๋ฅด๊ฒŒ ๋๋‹ค.
"""
# test_body = """
# ์„ธ๊ณ„ 1์œ„ ์ธ๊ณต์ง€๋Šฅ(AI) ์นฉ ์ƒ์‚ฐ๊ธฐ์—… ์—”๋น„๋””์•„์˜ ์  ์Šจ ํ™ฉ ์ตœ๊ณ ๊ฒฝ์˜์ž(CEO)๊ฐ€ โ€œAI ๊ฒฝ์Ÿ์—์„œ ์ค‘๊ตญ์ด ๋ฏธ๊ตญ์„ ์ด๊ธธ ๊ฒƒโ€์ด๋ผ๊ณ  ๊ฒฝ๊ณ ํ–ˆ๋‹ค.
# ํ™ฉ CEO๋Š” 5์ผ(ํ˜„์ง€ ์‹œ๊ฐ„) ์˜๊ตญ ๋Ÿฐ๋˜์—์„œ ํŒŒ์ด๋‚ธ์…œํƒ€์ž„์Šค(FT) ์ฃผ์ตœ๋กœ ์—ด๋ฆฐ ํ–‰์‚ฌ์—์„œ โ€œ๋ฏธ๊ตญ๊ณผ ์˜๊ตญ ๋“ฑ ์„œ๋ฐฉ๊ตญ๊ฐ€๋“ค์€ ๋ƒ‰์†Œ์ฃผ์˜์— ๋ฐœ๋ชฉ์ด ์žกํ˜€ ์žˆ๋‹ค. ์šฐ๋ฆฌ์—๊ฒ ๋” ๋งŽ์€ ๋‚™๊ด€์ฃผ์˜๊ฐ€ ํ•„์š”ํ•˜๋‹คโ€๋ฉฐ ์ด ๊ฐ™์ด ๋งํ–ˆ๋‹ค. ๊ทธ๋Š” ๋ฏธ๊ตญ ๊ฐ ์ฃผ(ๅทž)์—์„œ ์ œ์ • ์ค‘์ธ AI ๊ด€๋ จ ์ƒˆ๋กœ์šด ๊ทœ์ •์„ ์–ธ๊ธ‰ํ•˜๋ฉฐ โ€œ๊ทธ ๊ฒฐ๊ณผ 50๊ฐœ์˜ ์ƒˆ๋กœ์šด ๊ทœ์ œ๊ฐ€ ์ƒ๊ธธ ์ˆ˜๋„ ์žˆ๋‹คโ€๊ณ  ์šฐ๋ คํ–ˆ๋‹ค. ๊ทœ์ œ ํ™˜๊ฒฝ์ด ์„œ๋ฐฉ ๊ตญ๊ฐ€ ๊ธฐ์ˆ  ๊ฒฝ์Ÿ๋ ฅ์„ ๋–จ์–ด๋œจ๋ฆฐ๋‹ค๋Š” ์ง€์ ์ด๋‹ค.
# ๋ฐ˜๋ฉด ์ค‘๊ตญ ๊ธฐ์—…์€ ์ •๋ถ€ ์ •์ฑ…์— ํž˜์ž…์–ด ๋น ๋ฅด๊ฒŒ ๊ธฐ์ˆ ์„ ๋ฐœ์ „์‹œํ‚ฌ ์ˆ˜ ์žˆ๋Š” ํ™˜๊ฒฝ์ด๋ผ๊ณ  ๊ฐ•์กฐํ–ˆ๋‹ค. ํ™ฉ CEO๋Š” โ€œ์ค‘๊ตญ์—์„œ๋Š” ์ „๊ธฐ๊ฐ€ ๋ฌด๋ฃŒโ€๋ผ๋ฉฐ โ€œ์—๋„ˆ์ง€ ๋ณด์กฐ๊ธˆ ์ •์ฑ… ๋•๋ถ„์— ํ˜„์ง€ ๊ธฐ์ˆ ๊ธฐ์—…๋“ค์ด ์—”๋น„๋””์•„ ๋Œ€์ฒด AI ์นฉ์„ ํ›จ์”ฌ ์ €๋ ดํ•˜๊ฒŒ ์šด์šฉํ•  ์ˆ˜ ์žˆ๋‹คโ€๊ณ  ๋งํ–ˆ๋‹ค.
# ์ผ๋ฐ˜์ ์œผ๋กœ ์—”๋น„๋””์•„ ๊ณ ์„ฑ๋Šฅ ์นฉ์ด ์—ฐ์‚ฐ ๋Šฅ๋ ฅ๊ณผ ์ „๋ ฅ ํšจ์œจ์„ฑ ๋ฉด์—์„œ ํ™”์›จ์ด ๋“ฑ ์ค‘๊ตญ์‚ฐ ์นฉ์„ ์••๋„ํ•˜๋Š” ๊ฒƒ์œผ๋กœ ํ‰๊ฐ€๋˜์ง€๋งŒ, ์ค‘๊ตญ์ด ์—๋„ˆ์ง€ ๋ณด์กฐ๊ธˆ์„ ์ง€๊ธ‰ํ•˜๋ฉด ๊ธฐ์—…๋“ค์ด ํ™”์›จ์ด ์นฉ์„ ์“ฐ๋”๋ผ๋„ ์—๋„ˆ์ง€ ๋น„์šฉ์„ ๋งŽ์ด ๋ถ€๋‹ดํ•˜์ง€ ์•Š๊ฒŒ ๋จ์œผ๋กœ์จ ์—”๋น„๋””์•„ ์นฉ ์žฅ์ ์ด ์ผ์ • ๋ถ€๋ถ„ ์ƒ์‡„๋œ๋‹ค๋Š” ๋œป์ด๋‹ค.
# ์‹ค์ œ๋กœ ์ค‘๊ตญ์ด ๋ฐ”์ดํŠธ๋Œ„์Šค, ์•Œ๋ฆฌ๋ฐ”๋ฐ”, ํ…์„ผํŠธ ๋“ฑ ์ฃผ์š” ๊ธฐ์ˆ  ๊ธฐ์—…์ด ์šด์˜ํ•˜๋Š” ๋ฐ์ดํ„ฐ ์„ผํ„ฐ์— ์ „๋ ฅ ์š”๊ธˆ์„ ์ตœ๋Œ€ 50%๊นŒ์ง€ ์ธํ•˜ํ•˜๋Š” ๋ณด์กฐ๊ธˆ ์ œ๋„๋ฅผ ๋„์ž…ํ–ˆ๋‹ค๊ณ  FT๊ฐ€ ์ตœ๊ทผ ๋ณด๋„ํ–ˆ๋‹ค. ์ง€๋ฐฉ ์ •๋ถ€๊ฐ€ ์ž๊ตญ์‚ฐ ์นฉ์„ ์‚ฌ์šฉํ•˜๋ฉด ์—”๋น„๋””์•„๋ณด๋‹ค ์—๋„ˆ์ง€ ํšจ์œจ์ด ๋–จ์–ด์ ธ ๋ฐ์ดํ„ฐ์„ผํ„ฐ ์šด์˜๋น„ ๋ถ€๋‹ด์ด ํฌ๋‹ค๋Š” ์—…๊ณ„ ๋ถˆ๋งŒ์„ ์ ‘์ˆ˜ํ•œ ๋’ค ์ธ์„ผํ‹ฐ๋ธŒ๋ฅผ ํ™•๋Œ€ํ–ˆ๋‹ค.
# ํ™ฉ CEO์˜ ์ด๋‚  ๋ฐœ์–ธ์€ ๋„๋„๋“œ ํŠธ๋Ÿผํ”„ ๋ฏธ๊ตญ ๋Œ€ํ†ต๋ น์ด ์—”๋น„๋””์•„ ์ตœ์ฒจ๋‹จ ์นฉ ์ค‘๊ตญ ์ˆ˜์ถœ๊ธˆ์ง€๋ฅผ ๊ณ„์† ๊ณ ์ˆ˜ํ•˜๊ฒ ๋‹ค๋Š” ๋ฐฉ์นจ์„ ๋ฐํžŒ ์ดํ›„ ๋‚˜์™€ ๋”์šฑ ์ฃผ๋ชฉ๋ฐ›์•˜๋‹ค. ํŠธ๋Ÿผํ”„ ๋Œ€ํ†ต๋ น์€ ์ง€๋‚œ 2์ผ ๊ณต๊ฐœ๋œ CBS์™€์˜ ์ธํ„ฐ๋ทฐ์—์„œ โ€œ์ค‘๊ตญ์ด ์—”๋น„๋””์•„์™€ ๊ฑฐ๋ž˜ํ•˜๋Š” ๊ฒƒ์„ ํ—ˆ์šฉํ•˜๊ฒ ์ง€๋งŒ ์ตœ์ฒจ๋‹จ ๊ธฐ์ˆ ์„ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์€ ํ—ˆ์šฉํ•˜์ง€ ์•Š์„ ๊ฒƒโ€์ด๋ผ๋ฉฐ โ€œ์ตœ์ฒจ๋‹จ ๊ธฐ์ˆ ์€ ๋ฏธ๊ตญ ์™ธ์—๋Š” ๋ˆ„๊ตฌ๋„ ์‚ฌ์šฉํ•˜์ง€ ๋ชปํ•˜๊ฒŒ ํ•  ๊ฒƒโ€์ด๋ผ๊ณ  ๋ชป ๋ฐ•์•˜๋‹ค.
# ์—”๋น„๋””์•„๋Š” ํ˜„์žฌ AI ์นฉ ์‹œ์žฅ 80% ์ด์ƒ์„ ์žฅ์•…ํ•œ ๋…์  ๊ธฐ์—…์ด์ง€๋งŒ, ๊ฐ€์žฅ ํฐ ์ค‘๊ตญ ์‹œ์žฅ์ด ํŠธ๋Ÿผํ”„ ํ–‰์ •๋ถ€๊ฐ€ ์ฃผ๋„ํ•˜๋Š” ๊ณ ๊ฐ•๋„ ์ˆ˜์ถœ ๊ทœ์ œ๋กœ ์‚ฌ์‹ค์ƒ ๋ง‰ํ˜€์žˆ๋‹ค. ์ด์— ์—”๋น„๋””์•„๋Š” ์ค‘๊ตญ ์‹œ์žฅ์šฉ์œผ๋กœ ์ €(ไฝŽ)์„ฑ๋Šฅ AI ์นฉ์„ ๋”ฐ๋กœ ์ œ์ž‘ํ•˜๊ณ  ํ•ด๋‹น ์นฉ ๋งค์ถœ 15%๋ฅผ ๋ฏธ ์ •๋ถ€์— ์ง€๋ถˆํ•˜๊ธฐ๋กœ ํ•ฉ์˜ํ–ˆ๋‹ค. ๊ทธ๋Ÿฌ๋‚˜ ์ด๋งˆ์ €๋„ ๋ฏธ ์ •๋ถ€๊ฐ€ ๊ด€๋ จ ๊ทœ์ • ์ฑ„ํƒ์„ ๋ฏธ๋ฃจ๊ณ  ์žˆ์–ด ์‚ฌ์‹ค์ƒ ํŒ๋งค๊ฐ€ ์ค‘๋‹จ๋œ ์ƒํƒœ๋ผ๊ณ  FT๋Š” ๋ณด๋„ํ–ˆ๋‹ค.
# ํŠธ๋Ÿผํ”„ ๋Œ€ํ†ต๋ น์€ ํ™ฉ CEO์˜ ๋ˆ์งˆ๊ธด ๋กœ๋น„์— ํ•œ๋•Œ ์ง€๋‚œ 10์›” 30์ผ ์—ด๋ฆฐ ๋ฏธ์ค‘ ์ •์ƒํšŒ๋‹ด์—์„œ ์—”๋น„๋””์•„ ์ฒจ๋‹จ AI ๋Œ€์ค‘ ์ˆ˜์ถœ ๋ฌธ์ œ๋ฅผ ์˜์ œ์— ํฌํ•จ์‹œํ‚ฌ ๊ณ„ํš์ด์—ˆ์œผ๋‚˜ ์ฐธ๋ชจ์ง„์˜ ๊ฐ•๋ ฅํ•œ ๋ฐ˜๋Œ€๋กœ ๋งˆ์Œ์„ ๋ฐ”๊พผ ๊ฒƒ์œผ๋กœ ์•Œ๋ ค์กŒ๋‹ค. ํŠธ๋Ÿผํ”„ ๋Œ€ํ†ต๋ น์€ ๋ฏธ์ค‘ ์ •์ƒํšŒ๋‹ด ์ดํ›„ ๊ธฐ์ž๋“ค์—๊ฒŒ โ€œํšŒ๋‹ด์—์„œ ๋ธ”๋ž™์›ฐ(์—”๋น„๋””์•„ ์ตœ์ฒจ๋‹จ AI ์นฉ ์‹œ๋ฆฌ์ฆˆ) ์ด์•ผ๊ธฐ๋Š” ๋‚˜์˜ค์ง€ ์•Š์•˜๋‹คโ€๊ณ  ์ „ํ–ˆ๋‹ค.
# """
# if "์—ฌ๊ธฐ์—" in test_body:
# print("\n๐Ÿšจ ๊ฒฝ๊ณ : 'test_body' ๋ณ€์ˆ˜์— ํ…Œ์ŠคํŠธํ•  ์‹ค์ œ ๊ธฐ์‚ฌ ๋ณธ๋ฌธ์„ ๋„ฃ์–ด์ฃผ์„ธ์š”!\n")
# else:
result = get_crossref_score_and_reason(test_body)
print("\n--- ์ตœ์ข… ๊ฒฐ๊ณผ ---")
print(f"Score: {result['score']}")
print(f"Reason: {result['reason']}")
print(f"Recommendation: {result['recommendation']}")
print(f"Found URLs: {result['paired_results']}")