Vansh-ika00 commited on
Commit
506cefd
Β·
verified Β·
1 Parent(s): 877d4fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -43
app.py CHANGED
@@ -4,16 +4,18 @@ from urllib.parse import urlparse
4
  import csv
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
6
  import torch
7
- #URL model
 
 
8
  url_tokenizer = AutoTokenizer.from_pretrained("najla45/phishing_detection_fine_tuned_bert")
9
  url_model = AutoModelForSequenceClassification.from_pretrained("najla45/phishing_detection_fine_tuned_bert")
10
  url_classifier = pipeline("text-classification", model=url_model, tokenizer=url_tokenizer)
11
 
12
- #email model
13
  email_tokenizer = AutoTokenizer.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1")
14
  email_model = AutoModelForSequenceClassification.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1")
15
 
16
- #logic for checking the state of url
17
  def is_phishing_url(url):
18
  suspicious_keywords = ['secure', 'account', 'update', 'free', 'login', 'verify', 'banking']
19
  domain = urlparse(url).netloc
@@ -35,7 +37,7 @@ def is_phishing_url(url):
35
 
36
  return score
37
 
38
- #logic checking for phishing email
39
  def predict_email(email_text):
40
  inputs = email_tokenizer(email_text, return_tensors="pt", truncation=True, max_length=512)
41
  with torch.no_grad():
@@ -52,10 +54,9 @@ def predict_email(email_text):
52
  max_label, max_score = max(labels.items(), key=lambda x: x[1])
53
  return max_label, max_score, labels
54
 
55
-
56
- #LOGGING ALL DATA TO CSV FILE
57
- import os
58
  LOG_FILE = os.path.join(os.path.dirname(__file__), "phishing_log.csv")
 
59
  def log_to_csv(url, rule_score, bert_label, bert_score, final_decision):
60
  try:
61
  file_exists = os.path.isfile(LOG_FILE)
@@ -67,8 +68,7 @@ def log_to_csv(url, rule_score, bert_label, bert_score, final_decision):
67
  except Exception as e:
68
  print(f"Error writing to CSV: {e}")
69
 
70
-
71
- #Combining URL and email checking logic
72
  def combined_phishing_detector(url, input_type, log=True):
73
  if input_type == "URL":
74
  rule_score = is_phishing_url(url)
@@ -86,21 +86,23 @@ def combined_phishing_detector(url, input_type, log=True):
86
  rule_score = "N/A"
87
  rule_result = "Not Applicable"
88
  final_decision = "Phishing" if bert_label.startswith("phishing") and bert_score > 0.7 else "Safe"
 
 
 
 
 
89
 
90
-
91
  if log:
92
  log_to_csv(url, rule_score, bert_label, bert_score, final_decision)
93
 
94
  return url, rule_score, bert_label, bert_score, final_decision
95
 
96
-
97
-
98
  def run_detector(text, input_type):
99
- url,rule_score, bert_label, bert_score,final_decision = combined_phishing_detector(text, input_type,log=True)
100
-
101
  # Add emoji based on result
102
  if final_decision.lower() == "phishing":
103
- emoji = "🚨" # warning
104
  elif final_decision.lower() == "safe":
105
  emoji = "βœ…" # check mark
106
  else:
@@ -112,43 +114,51 @@ def run_detector(text, input_type):
112
  f"πŸ€– BERT Label: {bert_label}\n"
113
  f"πŸ” Confidence: {bert_score:.2f}"
114
  )
115
- return message,LOG_FILE
116
-
117
- #---GUI-----
118
- gr.HTML("""
119
- <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700&display=swap" rel="stylesheet">
120
- <h1 style='text-align:center; color:white; font-family: "Poppins", sans-serif;'>πŸ” Phishing URL & Email Detector (BERT + Rules) πŸ”</h1>
121
- """)
122
-
123
- with gr.Blocks(css="""
124
- .gradio-container {
125
- background-image: url('https://c8.alamy.com/comp/M79X4X/cyber-security-buzzwords-phishing-alert-with-blue-numbers-in-background-M79X4X.jpg');
126
- background-size: cover;
127
- background-position: center;
128
- background-repeat: no-repeat;
129
- font-family: 'Poppins', sans-serif;
130
- color: white;
131
- }
132
- input, textarea, button, label, .gr-box, .gr-button, .gr-textbox, .gr-radio, .gr-file {
133
- font-family: 'Poppins', sans-serif !important;
134
- color: white;
135
- }
136
- """) as demo:
137
-
138
- gr.HTML("<h1 style='text-align:center; color:white;'>πŸ” Phishing URL & Email Detector (BERT + Rules) πŸ”</h1>")
139
 
140
  with gr.Row():
141
- input_text = gr.Textbox(label="Enter URL or Email", lines=5)
142
  input_type = gr.Radio(["URL", "Email/Message"], label="Input Type")
143
 
144
- result_output = gr.Textbox(label="Detection Result", lines=4, interactive=False)
145
  log_file_output = gr.File(label="Download Log File")
146
 
147
  detect_button = gr.Button("Detect")
148
 
149
- detect_button.click(fn=run_detector, inputs=[input_text, input_type], outputs=[result_output, log_file_output])
 
 
 
 
150
 
151
-
152
  demo.launch(share=True)
153
 
154
 
 
 
 
 
 
 
4
  import csv
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
6
  import torch
7
+ import os
8
+
9
+ # URL model
10
  url_tokenizer = AutoTokenizer.from_pretrained("najla45/phishing_detection_fine_tuned_bert")
11
  url_model = AutoModelForSequenceClassification.from_pretrained("najla45/phishing_detection_fine_tuned_bert")
12
  url_classifier = pipeline("text-classification", model=url_model, tokenizer=url_tokenizer)
13
 
14
+ # Email model
15
  email_tokenizer = AutoTokenizer.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1")
16
  email_model = AutoModelForSequenceClassification.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1")
17
 
18
+ # Logic for checking the state of URL
19
  def is_phishing_url(url):
20
  suspicious_keywords = ['secure', 'account', 'update', 'free', 'login', 'verify', 'banking']
21
  domain = urlparse(url).netloc
 
37
 
38
  return score
39
 
40
+ # Logic checking for phishing email
41
  def predict_email(email_text):
42
  inputs = email_tokenizer(email_text, return_tensors="pt", truncation=True, max_length=512)
43
  with torch.no_grad():
 
54
  max_label, max_score = max(labels.items(), key=lambda x: x[1])
55
  return max_label, max_score, labels
56
 
57
+ # Logging all data to CSV file
 
 
58
  LOG_FILE = os.path.join(os.path.dirname(__file__), "phishing_log.csv")
59
+
60
  def log_to_csv(url, rule_score, bert_label, bert_score, final_decision):
61
  try:
62
  file_exists = os.path.isfile(LOG_FILE)
 
68
  except Exception as e:
69
  print(f"Error writing to CSV: {e}")
70
 
71
+ # Combining URL and email checking logic
 
72
  def combined_phishing_detector(url, input_type, log=True):
73
  if input_type == "URL":
74
  rule_score = is_phishing_url(url)
 
86
  rule_score = "N/A"
87
  rule_result = "Not Applicable"
88
  final_decision = "Phishing" if bert_label.startswith("phishing") and bert_score > 0.7 else "Safe"
89
+ else:
90
+ rule_score = "N/A"
91
+ bert_label = "unknown"
92
+ bert_score = 0.0
93
+ final_decision = "Safe"
94
 
 
95
  if log:
96
  log_to_csv(url, rule_score, bert_label, bert_score, final_decision)
97
 
98
  return url, rule_score, bert_label, bert_score, final_decision
99
 
 
 
100
  def run_detector(text, input_type):
101
+ url, rule_score, bert_label, bert_score, final_decision = combined_phishing_detector(text, input_type, log=True)
102
+
103
  # Add emoji based on result
104
  if final_decision.lower() == "phishing":
105
+ emoji = "🚨" # warning
106
  elif final_decision.lower() == "safe":
107
  emoji = "βœ…" # check mark
108
  else:
 
114
  f"πŸ€– BERT Label: {bert_label}\n"
115
  f"πŸ” Confidence: {bert_score:.2f}"
116
  )
117
+ return message, LOG_FILE
118
+
119
+ # ---------- GUI ----------
120
+ with gr.Blocks() as demo:
121
+
122
+ # Custom font + CSS + title (no background image)
123
+ gr.HTML("""
124
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700&display=swap" rel="stylesheet">
125
+ <style>
126
+ .gradio-container {
127
+ background: radial-gradient(circle at top, #1e293b, #020617);
128
+ background-attachment: fixed;
129
+ font-family: "Poppins", sans-serif;
130
+ color: white;
131
+ }
132
+ .gradio-container * {
133
+ font-family: "Poppins", sans-serif !important;
134
+ }
135
+ label, .gr-textbox, .gr-button, .gr-file {
136
+ color: white !important;
137
+ }
138
+ </style>
139
+ <h1 style='text-align:center; color:white;'>πŸ” Phishing URL & Email Detector (BERT + Rules) πŸ”</h1>
140
+ """)
141
 
142
  with gr.Row():
143
+ input_text = gr.Textbox(label="Enter URL or Email", lines=5, placeholder="Paste URL or email content here...")
144
  input_type = gr.Radio(["URL", "Email/Message"], label="Input Type")
145
 
146
+ result_output = gr.Textbox(label="Detection Result", lines=6, interactive=False)
147
  log_file_output = gr.File(label="Download Log File")
148
 
149
  detect_button = gr.Button("Detect")
150
 
151
+ detect_button.click(
152
+ fn=run_detector,
153
+ inputs=[input_text, input_type],
154
+ outputs=[result_output, log_file_output]
155
+ )
156
 
 
157
  demo.launch(share=True)
158
 
159
 
160
+
161
+
162
+
163
+
164
+