Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,12 +14,12 @@ MODEL_NAME = "cybersectony/phishing-email-detection-distilbert_v2.4.1"
|
|
| 14 |
# Explanation of labels and their values
|
| 15 |
"""
|
| 16 |
Labels and Their Meanings:
|
| 17 |
-
- Legitimate: The email
|
| 18 |
-
- Phishing: The email
|
| 19 |
-
- Suspicious: The email has questionable content and may
|
| 20 |
-
- Spam: The email is
|
| 21 |
-
Each label
|
| 22 |
-
Higher percentages
|
| 23 |
"""
|
| 24 |
|
| 25 |
# Global variables for model and tokenizer
|
|
@@ -44,13 +44,23 @@ def is_valid_email_text(text):
|
|
| 44 |
return False, "Please enter some email text."
|
| 45 |
if len(text.strip()) < 10:
|
| 46 |
return False, "Text too short for analysis."
|
| 47 |
-
# Check for basic email-like structure or meaningful words
|
| 48 |
if len(text.split()) < 3 or not re.search(r"[a-zA-Z]{3,}", text):
|
| 49 |
return False, "Text appears incoherent or not email-like."
|
| 50 |
return True, ""
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
def predict_email(email_text):
|
| 53 |
-
"""Simplified prediction
|
| 54 |
# Input validation
|
| 55 |
valid, message = is_valid_email_text(email_text)
|
| 56 |
if not valid:
|
|
@@ -76,7 +86,7 @@ def predict_email(email_text):
|
|
| 76 |
outputs = model(**inputs)
|
| 77 |
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()
|
| 78 |
|
| 79 |
-
#
|
| 80 |
labels = (model.config.id2label if hasattr(model.config, 'id2label') and model.config.id2label
|
| 81 |
else {0: "Legitimate", 1: "Phishing", 2: "Suspicious", 3: "Spam"} if len(probs) == 4
|
| 82 |
else {0: "Legitimate", 1: "Phishing"})
|
|
@@ -87,87 +97,5 @@ def predict_email(email_text):
|
|
| 87 |
# Get top prediction
|
| 88 |
max_label, max_prob = max(results.items(), key=lambda x: x[1])
|
| 89 |
|
| 90 |
-
#
|
| 91 |
-
if "phishing" in max_label.lower() or "suspicious" in
|
| 92 |
-
risk_level = "⚠️ Risky" if max_prob > 60 else "⚡ Low Risk"
|
| 93 |
-
elif "spam" in max_label.lower():
|
| 94 |
-
risk_level = "🗑️ Spam"
|
| 95 |
-
else:
|
| 96 |
-
risk_level = "✅ Safe" if max_prob > 60 else "❓ Uncertain"
|
| 97 |
-
|
| 98 |
-
# Format output
|
| 99 |
-
output = f"Result: {risk_level}\n"
|
| 100 |
-
output += f"Top Prediction: {max_label} ({max_prob:.1f}%)\n"
|
| 101 |
-
output += "Details:\n"
|
| 102 |
-
for label, prob in sorted(results.items(), key=lambda x: x[1], reverse=True):
|
| 103 |
-
output += f"{label}: {prob:.1f}%\n"
|
| 104 |
-
|
| 105 |
-
# Simple recommendation
|
| 106 |
-
if "phishing" in max_label.lower() or "suspicious" in max_label.lower():
|
| 107 |
-
output += "Advice: Avoid clicking links or sharing info."
|
| 108 |
-
elif "spam" in max_label.lower():
|
| 109 |
-
output += "Advice: Mark as spam or delete."
|
| 110 |
-
else:
|
| 111 |
-
output += "Advice: Appears safe, but stay cautious."
|
| 112 |
-
|
| 113 |
-
return output
|
| 114 |
-
|
| 115 |
-
except Exception as e:
|
| 116 |
-
logger.error(f"Error during prediction: {e}")
|
| 117 |
-
return f"❌ Error: Analysis failed - {str(e)}"
|
| 118 |
-
|
| 119 |
-
# Example emails
|
| 120 |
-
example_legitimate = """Dear Customer,
|
| 121 |
-
Thank you for your purchase from TechStore. Your order #ORD-2024-001234 is processed.
|
| 122 |
-
Order Details:
|
| 123 |
-
- Product: Wireless Headphones
|
| 124 |
-
- Amount: $79.99
|
| 125 |
-
- Delivery: 3-5 days
|
| 126 |
-
Best regards,
|
| 127 |
-
TechStore"""
|
| 128 |
-
example_phishing = """URGENT!!!
|
| 129 |
-
Your account is COMPROMISED! Click here to secure: http://fake-site.com/verify
|
| 130 |
-
Act NOW or your account will be suspended!
|
| 131 |
-
Security Team"""
|
| 132 |
-
example_neutral = """Hi team,
|
| 133 |
-
Reminder: meeting today at 10 PM. Bring project updates.
|
| 134 |
-
Thanks,
|
| 135 |
-
Byabato"""
|
| 136 |
-
|
| 137 |
-
# Load model on startup
|
| 138 |
-
load_model()
|
| 139 |
-
|
| 140 |
-
# Minimalist Gradio interface
|
| 141 |
-
with gr.Blocks(title="PhishGuardian", theme=gr.themes.Soft()) as iface:
|
| 142 |
-
gr.Markdown("# 🛡️ PhishGuardian\nSimple email safety checker.\n\nCheck if an email is safe or risky. Paste the email text and click 'Check'.")
|
| 143 |
-
|
| 144 |
-
with gr.Row():
|
| 145 |
-
with gr.Column(scale=2):
|
| 146 |
-
email_input = gr.Textbox(
|
| 147 |
-
lines=8,
|
| 148 |
-
placeholder="Paste email here...",
|
| 149 |
-
label="📧 Email"
|
| 150 |
-
)
|
| 151 |
-
with gr.Row():
|
| 152 |
-
analyze_btn = gr.Button("🔍 Check", variant="primary")
|
| 153 |
-
clear_btn = gr.Button("🗑️ Clear")
|
| 154 |
-
|
| 155 |
-
with gr.Column(scale=2):
|
| 156 |
-
output = gr.Textbox(
|
| 157 |
-
label="✅ Results",
|
| 158 |
-
lines=10,
|
| 159 |
-
interactive=False,
|
| 160 |
-
show_copy_button=True
|
| 161 |
-
)
|
| 162 |
-
|
| 163 |
-
gr.Markdown("### 📝 Examples")
|
| 164 |
-
with gr.Row():
|
| 165 |
-
gr.Button("✅ Legitimate", size="sm").click(lambda: example_legitimate, outputs=email_input)
|
| 166 |
-
gr.Button("🚨 Phishing", size="sm").click(lambda: example_phishing, outputs=email_input)
|
| 167 |
-
gr.Button("📄 Neutral", size="sm").click(lambda: example_neutral, outputs=email_input)
|
| 168 |
-
|
| 169 |
-
analyze_btn.click(predict_email, inputs=email_input, outputs=output)
|
| 170 |
-
clear_btn.click(lambda: ("", ""), outputs=[email_input, output])
|
| 171 |
-
|
| 172 |
-
if __name__ == "__main__":
|
| 173 |
-
iface.launch(server_port=7860, show_error=True)
|
|
|
|
| 14 |
# Explanation of labels and their values
|
| 15 |
"""
|
| 16 |
Labels and Their Meanings:
|
| 17 |
+
- Legitimate: The email is safe and likely from a trusted source.
|
| 18 |
+
- Phishing: The email is a scam attempting to steal personal information.
|
| 19 |
+
- Suspicious: The email has questionable content and may be unsafe.
|
| 20 |
+
- Spam: The email is unwanted promotional or junk content.
|
| 21 |
+
Each label has a percentage (0-100%) showing the model's confidence.
|
| 22 |
+
Higher percentages indicate greater certainty.
|
| 23 |
"""
|
| 24 |
|
| 25 |
# Global variables for model and tokenizer
|
|
|
|
| 44 |
return False, "Please enter some email text."
|
| 45 |
if len(text.strip()) < 10:
|
| 46 |
return False, "Text too short for analysis."
|
|
|
|
| 47 |
if len(text.split()) < 3 or not re.search(r"[a-zA-Z]{3,}", text):
|
| 48 |
return False, "Text appears incoherent or not email-like."
|
| 49 |
return True, ""
|
| 50 |
|
| 51 |
+
def get_colored_bar(percentage):
|
| 52 |
+
"""Create a simple colored bar based on percentage"""
|
| 53 |
+
if percentage >= 85:
|
| 54 |
+
color = "🟢" if percentage >= 85 else "🟥"
|
| 55 |
+
elif percentage >= 50:
|
| 56 |
+
color = "🟡"
|
| 57 |
+
else:
|
| 58 |
+
color = "⚪"
|
| 59 |
+
bar_length = max(1, int(percentage / 5)) # Scale to 20 characters
|
| 60 |
+
return color * bar_length + "⚪" * (20 - bar_length)
|
| 61 |
+
|
| 62 |
def predict_email(email_text):
|
| 63 |
+
"""Simplified prediction with actual labels and colored bars"""
|
| 64 |
# Input validation
|
| 65 |
valid, message = is_valid_email_text(email_text)
|
| 66 |
if not valid:
|
|
|
|
| 86 |
outputs = model(**inputs)
|
| 87 |
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()
|
| 88 |
|
| 89 |
+
# Define actual labels (handle 2 or 4 classes)
|
| 90 |
labels = (model.config.id2label if hasattr(model.config, 'id2label') and model.config.id2label
|
| 91 |
else {0: "Legitimate", 1: "Phishing", 2: "Suspicious", 3: "Spam"} if len(probs) == 4
|
| 92 |
else {0: "Legitimate", 1: "Phishing"})
|
|
|
|
| 97 |
# Get top prediction
|
| 98 |
max_label, max_prob = max(results.items(), key=lambda x: x[1])
|
| 99 |
|
| 100 |
+
# Risk levels with higher threshold (85%)
|
| 101 |
+
if "phishing" in max_label.lower() or "suspicious" in max_labelkilled by server before completion, please rerun or continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|