File size: 2,668 Bytes
d32abd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import streamlit as st
import re
from urllib.parse import urlparse
import csv
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
# Load model once at startup
model_name = "najla45/phishing_detection_fine_tuned_bert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
bert_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
def is_phishing_url(url):
suspicious_keywords = ['secure', 'account', 'update', 'free', 'login', 'verify', 'banking']
domain = urlparse(url).netloc
path = urlparse(url).path
score = 0
if re.match(r'https?://\d{1,3}(\.\d{1,3}){3}', url):
score += 2
if '-' in domain:
score += 1
if not url.startswith("https://"):
score += 3
if any(keyword in url.lower() for keyword in suspicious_keywords):
score += 2
if len(url) > 75:
score += 1
if '@' in url:
score += 2
return score
def log_to_csv(input_text, rule_score, bert_label, bert_score, final_decision):
with open("phishing_log.csv", "a", newline='') as f:
writer = csv.writer(f)
writer.writerow([input_text, rule_score, bert_label, f"{bert_score:.2f}", final_decision])
def combined_phishing_detector(url):
rule_score = 0
if url.startswith("http"):
rule_score = is_phishing_url(url)
rule_result = "Phishing" if rule_score >= 3 else "Safe"
else:
rule_result = "Not Applicable"
bert_result = bert_classifier(url)[0]
label_map = {"LABEL_0": "safe", "LABEL_1": "phishing"}
bert_label = label_map.get(bert_result["label"].upper(), "unknown")
bert_score = bert_result["score"]
if rule_result == "Phishing" and (bert_label == "phishing" and bert_score > 0.75):
final_decision = "Phishing"
else:
final_decision = "Safe"
log_to_csv(url, rule_score, bert_label, bert_score, final_decision)
return final_decision
# ---------------- STREAMLIT UI ----------------
st.set_page_config(page_title="Phishing Detector", page_icon="π")
st.title("π Phishing URL & Message Detector")
user_input = st.text_area("Paste a URL or email message below:")
if st.button("Check"):
if user_input.strip():
result = combined_phishing_detector(user_input.strip())
if result == "Phishing":
st.error(f"π¨ Detected as: {result}")
else:
st.success(f"β
Detected as: {result}")
else:
st.warning("β οΈ Please enter a valid URL or message.") |