MUFASA25 commited on
Commit
2747353
·
verified ·
1 Parent(s): 09b3e31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -93
app.py CHANGED
@@ -14,12 +14,12 @@ MODEL_NAME = "cybersectony/phishing-email-detection-distilbert_v2.4.1"
14
  # Explanation of labels and their values
15
  """
16
  Labels and Their Meanings:
17
- - Legitimate: The email appears safe and is likely from a trusted source.
18
- - Phishing: The email may be a scam attempting to steal personal information.
19
- - Suspicious: The email has questionable content and may not be safe.
20
- - Spam: The email is likely unwanted promotional or junk content.
21
- Each label comes with a percentage (0-100%) indicating the model's confidence.
22
- Higher percentages mean the model is more certain of the classification.
23
  """
24
 
25
  # Global variables for model and tokenizer
@@ -44,13 +44,23 @@ def is_valid_email_text(text):
44
  return False, "Please enter some email text."
45
  if len(text.strip()) < 10:
46
  return False, "Text too short for analysis."
47
- # Check for basic email-like structure or meaningful words
48
  if len(text.split()) < 3 or not re.search(r"[a-zA-Z]{3,}", text):
49
  return False, "Text appears incoherent or not email-like."
50
  return True, ""
51
 
 
 
 
 
 
 
 
 
 
 
 
52
  def predict_email(email_text):
53
- """Simplified prediction function with clear output"""
54
  # Input validation
55
  valid, message = is_valid_email_text(email_text)
56
  if not valid:
@@ -76,7 +86,7 @@ def predict_email(email_text):
76
  outputs = model(**inputs)
77
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()
78
 
79
- # Get labels from model config or fallback
80
  labels = (model.config.id2label if hasattr(model.config, 'id2label') and model.config.id2label
81
  else {0: "Legitimate", 1: "Phishing", 2: "Suspicious", 3: "Spam"} if len(probs) == 4
82
  else {0: "Legitimate", 1: "Phishing"})
@@ -87,87 +97,5 @@ def predict_email(email_text):
87
  # Get top prediction
88
  max_label, max_prob = max(results.items(), key=lambda x: x[1])
89
 
90
- # Simplified risk levels
91
- if "phishing" in max_label.lower() or "suspicious" in max_label.lower():
92
- risk_level = "⚠️ Risky" if max_prob > 60 else "⚡ Low Risk"
93
- elif "spam" in max_label.lower():
94
- risk_level = "🗑️ Spam"
95
- else:
96
- risk_level = "✅ Safe" if max_prob > 60 else "❓ Uncertain"
97
-
98
- # Format output
99
- output = f"Result: {risk_level}\n"
100
- output += f"Top Prediction: {max_label} ({max_prob:.1f}%)\n"
101
- output += "Details:\n"
102
- for label, prob in sorted(results.items(), key=lambda x: x[1], reverse=True):
103
- output += f"{label}: {prob:.1f}%\n"
104
-
105
- # Simple recommendation
106
- if "phishing" in max_label.lower() or "suspicious" in max_label.lower():
107
- output += "Advice: Avoid clicking links or sharing info."
108
- elif "spam" in max_label.lower():
109
- output += "Advice: Mark as spam or delete."
110
- else:
111
- output += "Advice: Appears safe, but stay cautious."
112
-
113
- return output
114
-
115
- except Exception as e:
116
- logger.error(f"Error during prediction: {e}")
117
- return f"❌ Error: Analysis failed - {str(e)}"
118
-
119
- # Example emails
120
- example_legitimate = """Dear Customer,
121
- Thank you for your purchase from TechStore. Your order #ORD-2024-001234 is processed.
122
- Order Details:
123
- - Product: Wireless Headphones
124
- - Amount: $79.99
125
- - Delivery: 3-5 days
126
- Best regards,
127
- TechStore"""
128
- example_phishing = """URGENT!!!
129
- Your account is COMPROMISED! Click here to secure: http://fake-site.com/verify
130
- Act NOW or your account will be suspended!
131
- Security Team"""
132
- example_neutral = """Hi team,
133
- Reminder: meeting today at 10 PM. Bring project updates.
134
- Thanks,
135
- Byabato"""
136
-
137
- # Load model on startup
138
- load_model()
139
-
140
- # Minimalist Gradio interface
141
- with gr.Blocks(title="PhishGuardian", theme=gr.themes.Soft()) as iface:
142
- gr.Markdown("# 🛡️ PhishGuardian\nSimple email safety checker.\n\nCheck if an email is safe or risky. Paste the email text and click 'Check'.")
143
-
144
- with gr.Row():
145
- with gr.Column(scale=2):
146
- email_input = gr.Textbox(
147
- lines=8,
148
- placeholder="Paste email here...",
149
- label="📧 Email"
150
- )
151
- with gr.Row():
152
- analyze_btn = gr.Button("🔍 Check", variant="primary")
153
- clear_btn = gr.Button("🗑️ Clear")
154
-
155
- with gr.Column(scale=2):
156
- output = gr.Textbox(
157
- label="✅ Results",
158
- lines=10,
159
- interactive=False,
160
- show_copy_button=True
161
- )
162
-
163
- gr.Markdown("### 📝 Examples")
164
- with gr.Row():
165
- gr.Button("✅ Legitimate", size="sm").click(lambda: example_legitimate, outputs=email_input)
166
- gr.Button("🚨 Phishing", size="sm").click(lambda: example_phishing, outputs=email_input)
167
- gr.Button("📄 Neutral", size="sm").click(lambda: example_neutral, outputs=email_input)
168
-
169
- analyze_btn.click(predict_email, inputs=email_input, outputs=output)
170
- clear_btn.click(lambda: ("", ""), outputs=[email_input, output])
171
-
172
- if __name__ == "__main__":
173
- iface.launch(server_port=7860, show_error=True)
 
14
  # Explanation of labels and their values
15
  """
16
  Labels and Their Meanings:
17
+ - Legitimate: The email is safe and likely from a trusted source.
18
+ - Phishing: The email is a scam attempting to steal personal information.
19
+ - Suspicious: The email has questionable content and may be unsafe.
20
+ - Spam: The email is unwanted promotional or junk content.
21
+ Each label has a percentage (0-100%) showing the model's confidence.
22
+ Higher percentages indicate greater certainty.
23
  """
24
 
25
  # Global variables for model and tokenizer
 
44
  return False, "Please enter some email text."
45
  if len(text.strip()) < 10:
46
  return False, "Text too short for analysis."
 
47
  if len(text.split()) < 3 or not re.search(r"[a-zA-Z]{3,}", text):
48
  return False, "Text appears incoherent or not email-like."
49
  return True, ""
50
 
51
+ def get_colored_bar(percentage):
52
+ """Create a simple colored bar based on percentage"""
53
+ if percentage >= 85:
54
+ color = "🟢" if percentage >= 85 else "🟥"
55
+ elif percentage >= 50:
56
+ color = "🟡"
57
+ else:
58
+ color = "⚪"
59
+ bar_length = max(1, int(percentage / 5)) # Scale to 20 characters
60
+ return color * bar_length + "⚪" * (20 - bar_length)
61
+
62
  def predict_email(email_text):
63
+ """Simplified prediction with actual labels and colored bars"""
64
  # Input validation
65
  valid, message = is_valid_email_text(email_text)
66
  if not valid:
 
86
  outputs = model(**inputs)
87
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()
88
 
89
+ # Define actual labels (handle 2 or 4 classes)
90
  labels = (model.config.id2label if hasattr(model.config, 'id2label') and model.config.id2label
91
  else {0: "Legitimate", 1: "Phishing", 2: "Suspicious", 3: "Spam"} if len(probs) == 4
92
  else {0: "Legitimate", 1: "Phishing"})
 
97
  # Get top prediction
98
  max_label, max_prob = max(results.items(), key=lambda x: x[1])
99
 
100
+ # Risk levels with higher threshold (85%)
101
+ if "phishing" in max_label.lower() or "suspicious" in max_labelkilled by server before completion, please rerun or continue