Vansh-ika00 commited on
Commit
9009faa
Β·
verified Β·
1 Parent(s): bafc59a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -154
app.py CHANGED
@@ -1,154 +1,154 @@
1
- import gradio as gr
2
- import re
3
- from urllib.parse import urlparse
4
- import csv
5
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
6
- import torch
7
- #URL model
8
- url_tokenizer = AutoTokenizer.from_pretrained("najla45/phishing_detection_fine_tuned_bert")
9
- url_model = AutoModelForSequenceClassification.from_pretrained("najla45/phishing_detection_fine_tuned_bert")
10
- url_classifier = pipeline("text-classification", model=url_model, tokenizer=url_tokenizer)
11
-
12
- #email model
13
- email_tokenizer = AutoTokenizer.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1")
14
- email_model = AutoModelForSequenceClassification.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1")
15
-
16
- #logic for checking the state of url
17
- def is_phishing_url(url):
18
- suspicious_keywords = ['secure', 'account', 'update', 'free', 'login', 'verify', 'banking']
19
- domain = urlparse(url).netloc
20
- path = urlparse(url).path
21
-
22
- score = 0
23
- if re.match(r'https?://\d{1,3}(\.\d{1,3}){3}', url):
24
- score += 2
25
- if '-' in domain:
26
- score += 1
27
- if not url.startswith("https://"):
28
- score += 3
29
- if any(keyword in url.lower() for keyword in suspicious_keywords):
30
- score += 2
31
- if len(url) > 75:
32
- score += 1
33
- if '@' in url:
34
- score += 2
35
-
36
- return score
37
-
38
- #logic checking for phishing email
39
- def predict_email(email_text):
40
- inputs = email_tokenizer(email_text, return_tensors="pt", truncation=True, max_length=512)
41
- with torch.no_grad():
42
- outputs = email_model(**inputs)
43
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()
44
-
45
- labels = {
46
- "legitimate_email": probs[0],
47
- "phishing_url": probs[1],
48
- "legitimate_url": probs[2],
49
- "phishing_url_alt": probs[3]
50
- }
51
-
52
- max_label, max_score = max(labels.items(), key=lambda x: x[1])
53
- return max_label, max_score, labels
54
-
55
-
56
- #LOGGING ALL DATA TO CSV FILE
57
- import os
58
- LOG_FILE = os.path.join(os.path.dirname(__file__), "phishing_log.csv")
59
- def log_to_csv(url, rule_score, bert_label, bert_score, final_decision):
60
- try:
61
- file_exists = os.path.isfile(LOG_FILE)
62
- with open(LOG_FILE, "a", newline='') as f:
63
- writer = csv.writer(f)
64
- if not file_exists:
65
- writer.writerow(["Input", "Rule Score", "BERT Label", "Confidence", "Final Decision"])
66
- writer.writerow([url, rule_score, bert_label, f"{bert_score:.2f}", final_decision])
67
- except Exception as e:
68
- print(f"Error writing to CSV: {e}")
69
-
70
-
71
- #Combining URL and email checking logic
72
- def combined_phishing_detector(url, input_type, log=True):
73
- if input_type == "URL":
74
- rule_score = is_phishing_url(url)
75
- rule_result = "Phishing" if rule_score >= 3 else "Safe"
76
-
77
- bert_result = url_classifier(url)[0]
78
- label_map = {"LABEL_0": "safe", "LABEL_1": "phishing"}
79
- bert_label = label_map.get(bert_result["label"].upper(), "unknown")
80
- bert_score = bert_result["score"]
81
-
82
- final_decision = "Phishing" if rule_result == "Phishing" and bert_label == "phishing" and bert_score > 0.75 else "Safe"
83
-
84
- elif input_type == "Email/Message":
85
- bert_label, bert_score, bert_probs = predict_email(url)
86
- rule_score = "N/A"
87
- rule_result = "Not Applicable"
88
- final_decision = "Phishing" if bert_label.startswith("phishing") and bert_score > 0.7 else "Safe"
89
-
90
- # βœ… This part was missing in your message
91
- if log:
92
- log_to_csv(url, rule_score, bert_label, bert_score, final_decision)
93
-
94
- return url, rule_score, bert_label, bert_score, final_decision
95
-
96
-
97
-
98
- def run_detector(text, input_type):
99
- url,rule_score, bert_label, bert_score,final_decision = combined_phishing_detector(text, input_type,log=True)
100
-
101
- # Add emoji based on result
102
- if final_decision.lower() == "phishing":
103
- emoji = "🚨" # warning
104
- elif final_decision.lower() == "safe":
105
- emoji = "βœ…" # check mark
106
- else:
107
- emoji = "❓"
108
-
109
- message = (
110
- f"{emoji} Result: {final_decision}\n"
111
- f"πŸ“Š Rule Score: {rule_score}\n"
112
- f"πŸ€– BERT Label: {bert_label}\n"
113
- f"πŸ” Confidence: {bert_score:.2f}"
114
- )
115
- return message,LOG_FILE
116
-
117
- #---GUI-----
118
- gr.HTML("""
119
- <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700&display=swap" rel="stylesheet">
120
- <h1 style='text-align:center; color:white; font-family: "Poppins", sans-serif;'>πŸ” Phishing URL & Email Detector (BERT + Rules) πŸ”</h1>
121
- """)
122
-
123
- with gr.Blocks(css="""
124
- .gradio-container {
125
- background-image: url('https://c8.alamy.com/comp/M79X4X/cyber-security-buzzwords-phishing-alert-with-blue-numbers-in-background-M79X4X.jpg');
126
- background-size: cover;
127
- background-position: center;
128
- background-repeat: no-repeat;
129
- font-family: 'Poppins', sans-serif;
130
- color: white;
131
- }
132
- input, textarea, button, label, .gr-box, .gr-button, .gr-textbox, .gr-radio, .gr-file {
133
- font-family: 'Poppins', sans-serif !important;
134
- color: white;
135
- }
136
- """) as demo:
137
-
138
- gr.HTML("<h1 style='text-align:center; color:white;'>πŸ” Phishing URL & Email Detector (BERT + Rules) πŸ”</h1>")
139
-
140
- with gr.Row():
141
- input_text = gr.Textbox(label="Enter URL or Email", lines=5)
142
- input_type = gr.Radio(["URL", "Email/Message"], label="Input Type")
143
-
144
- result_output = gr.Textbox(label="Detection Result", lines=4, interactive=False)
145
- log_file_output = gr.File(label="Download Log File")
146
-
147
- detect_button = gr.Button("Detect")
148
-
149
- detect_button.click(fn=run_detector, inputs=[input_text, input_type], outputs=[result_output, log_file_output])
150
-
151
-
152
- demo.launch(share=True)
153
-
154
-
 
1
+ import gradio as gr
2
+ import re
3
+ from urllib.parse import urlparse
4
+ import csv
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
6
+ import torch
7
+ #URL model
8
+ url_tokenizer = AutoTokenizer.from_pretrained("najla45/phishing_detection_fine_tuned_bert")
9
+ url_model = AutoModelForSequenceClassification.from_pretrained("najla45/phishing_detection_fine_tuned_bert")
10
+ url_classifier = pipeline("text-classification", model=url_model, tokenizer=url_tokenizer)
11
+
12
+ #email model
13
+ email_tokenizer = AutoTokenizer.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1")
14
+ email_model = AutoModelForSequenceClassification.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1")
15
+
16
+ #logic for checking the state of url
17
+ def is_phishing_url(url):
18
+ suspicious_keywords = ['secure', 'account', 'update', 'free', 'login', 'verify', 'banking']
19
+ domain = urlparse(url).netloc
20
+ path = urlparse(url).path
21
+
22
+ score = 0
23
+ if re.match(r'https?://\d{1,3}(\.\d{1,3}){3}', url):
24
+ score += 2
25
+ if '-' in domain:
26
+ score += 1
27
+ if not url.startswith("https://"):
28
+ score += 3
29
+ if any(keyword in url.lower() for keyword in suspicious_keywords):
30
+ score += 2
31
+ if len(url) > 75:
32
+ score += 1
33
+ if '@' in url:
34
+ score += 2
35
+
36
+ return score
37
+
38
+ #logic checking for phishing email
39
+ def predict_email(email_text):
40
+ inputs = email_tokenizer(email_text, return_tensors="pt", truncation=True, max_length=512)
41
+ with torch.no_grad():
42
+ outputs = email_model(**inputs)
43
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()
44
+
45
+ labels = {
46
+ "legitimate_email": probs[0],
47
+ "phishing_url": probs[1],
48
+ "legitimate_url": probs[2],
49
+ "phishing_url_alt": probs[3]
50
+ }
51
+
52
+ max_label, max_score = max(labels.items(), key=lambda x: x[1])
53
+ return max_label, max_score, labels
54
+
55
+
56
+ #LOGGING ALL DATA TO CSV FILE
57
+ import os
58
+ LOG_FILE = os.path.join(os.path.dirname(__file__), "phishing_log.csv")
59
+ def log_to_csv(url, rule_score, bert_label, bert_score, final_decision):
60
+ try:
61
+ file_exists = os.path.isfile(LOG_FILE)
62
+ with open(LOG_FILE, "a", newline='') as f:
63
+ writer = csv.writer(f)
64
+ if not file_exists:
65
+ writer.writerow(["Input", "Rule Score", "BERT Label", "Confidence", "Final Decision"])
66
+ writer.writerow([url, rule_score, bert_label, f"{bert_score:.2f}", final_decision])
67
+ except Exception as e:
68
+ print(f"Error writing to CSV: {e}")
69
+
70
+
71
+ #Combining URL and email checking logic
72
+ def combined_phishing_detector(url, input_type, log=True):
73
+ if input_type == "URL":
74
+ rule_score = is_phishing_url(url)
75
+ rule_result = "Phishing" if rule_score >= 3 else "Safe"
76
+
77
+ bert_result = url_classifier(url)[0]
78
+ label_map = {"LABEL_0": "safe", "LABEL_1": "phishing"}
79
+ bert_label = label_map.get(bert_result["label"].upper(), "unknown")
80
+ bert_score = bert_result["score"]
81
+
82
+ final_decision = "Phishing" if rule_result == "Phishing" and bert_label == "phishing" and bert_score > 0.75 else "Safe"
83
+
84
+ elif input_type == "Email/Message":
85
+ bert_label, bert_score, bert_probs = predict_email(url)
86
+ rule_score = "N/A"
87
+ rule_result = "Not Applicable"
88
+ final_decision = "Phishing" if bert_label.startswith("phishing") and bert_score > 0.7 else "Safe"
89
+
90
+
91
+ if log:
92
+ log_to_csv(url, rule_score, bert_label, bert_score, final_decision)
93
+
94
+ return url, rule_score, bert_label, bert_score, final_decision
95
+
96
+
97
+
98
+ def run_detector(text, input_type):
99
+ url,rule_score, bert_label, bert_score,final_decision = combined_phishing_detector(text, input_type,log=True)
100
+
101
+ # Add emoji based on result
102
+ if final_decision.lower() == "phishing":
103
+ emoji = "🚨" # warning
104
+ elif final_decision.lower() == "safe":
105
+ emoji = "βœ…" # check mark
106
+ else:
107
+ emoji = "❓"
108
+
109
+ message = (
110
+ f"{emoji} Result: {final_decision}\n"
111
+ f"πŸ“Š Rule Score: {rule_score}\n"
112
+ f"πŸ€– BERT Label: {bert_label}\n"
113
+ f"πŸ” Confidence: {bert_score:.2f}"
114
+ )
115
+ return message,LOG_FILE
116
+
117
+ #---GUI-----
118
+ gr.HTML("""
119
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700&display=swap" rel="stylesheet">
120
+ <h1 style='text-align:center; color:white; font-family: "Poppins", sans-serif;'>πŸ” Phishing URL & Email Detector (BERT + Rules) πŸ”</h1>
121
+ """)
122
+
123
+ with gr.Blocks(css="""
124
+ .gradio-container {
125
+ background-image: url('https://c8.alamy.com/comp/M79X4X/cyber-security-buzzwords-phishing-alert-with-blue-numbers-in-background-M79X4X.jpg');
126
+ background-size: cover;
127
+ background-position: center;
128
+ background-repeat: no-repeat;
129
+ font-family: 'Poppins', sans-serif;
130
+ color: white;
131
+ }
132
+ input, textarea, button, label, .gr-box, .gr-button, .gr-textbox, .gr-radio, .gr-file {
133
+ font-family: 'Poppins', sans-serif !important;
134
+ color: white;
135
+ }
136
+ """) as demo:
137
+
138
+ gr.HTML("<h1 style='text-align:center; color:white;'>πŸ” Phishing URL & Email Detector (BERT + Rules) πŸ”</h1>")
139
+
140
+ with gr.Row():
141
+ input_text = gr.Textbox(label="Enter URL or Email", lines=5)
142
+ input_type = gr.Radio(["URL", "Email/Message"], label="Input Type")
143
+
144
+ result_output = gr.Textbox(label="Detection Result", lines=4, interactive=False)
145
+ log_file_output = gr.File(label="Download Log File")
146
+
147
+ detect_button = gr.Button("Detect")
148
+
149
+ detect_button.click(fn=run_detector, inputs=[input_text, input_type], outputs=[result_output, log_file_output])
150
+
151
+
152
+ demo.launch(share=True)
153
+
154
+