Eason918 commited on
Commit
1518e91
·
verified ·
1 Parent(s): 6088071

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import streamlit as st
3
+ from transformers import pipeline
4
+
5
+ # ---------------- CONFIG ----------------
6
+ # Load models
7
+ pipe1 = pipeline("text-classification", model="ElSlay/BERT-Phishing-Email-Model")
8
+ pipe2 = pipeline("text-classification", model="Eason918/malicious-url-detector-v2", use_fast=False)
9
+ pipe3 = pipeline("text-classification", model="r3ddkahili/final-complete-malicious-url-model")
10
+
11
+ # Label normalization
12
+ def normalize_label(label):
13
+ return "benign" if label == "LABEL_0" else "malicious"
14
+
15
+ # Weighted Ensemble Calculation (only pipeline2 and 3)
16
+ def calculate_weighted_prediction(label2, score2, label3, score3):
17
+ weights = {"Pipeline2": 0.3, "Pipeline3": 0.7}
18
+ score_dict = {"benign": 0.0, "malicious": 0.0}
19
+ score_dict[normalize_label(label2)] += weights["Pipeline2"] * score2
20
+ score_dict[normalize_label(label3)] += weights["Pipeline3"] * score3
21
+ final_label = max(score_dict, key=score_dict.get)
22
+ final_score = score_dict[final_label]
23
+ return final_label, final_score
24
+
25
+ # Extract URLs
26
+ def extract_urls(text):
27
+ url_pattern = r'(https?://[^\s]+|www\.[^\s]+)'
28
+ return re.findall(url_pattern, text)
29
+
30
+ # ---------------- UI START ----------------
31
+ st.set_page_config(page_title="📩 Email Malicious Detector", layout="wide")
32
+ st.markdown("<h1 style='text-align: center;'>📩 Malicious Email Detection App</h1>", unsafe_allow_html=True)
33
+
34
+ st.markdown("### ✉️ Enter your email content:")
35
+ email_text = st.text_area("Paste your email content here:", height=200)
36
+
37
+ if st.button("🚨 Scan Email & Analyze URL"):
38
+ if not email_text.strip():
39
+ st.warning("⚠️ Please input some email content.")
40
+ else:
41
+ result1 = pipe1(email_text)[0]
42
+ label1, score1 = result1['label'], result1['score']
43
+ pred1 = normalize_label(label1)
44
+
45
+ if pred1 == "benign":
46
+ st.markdown("## 🛡️ **Prediction Result:**")
47
+ st.success(f"✅ BENIGN EMAIL CONTENT (Confidence Score: {score1:.2%})")
48
+ else:
49
+ urls = extract_urls(email_text)
50
+ if not urls:
51
+ st.warning("⚠️ Email content is malicious, but no URL found for further analysis.")
52
+ else:
53
+ url = urls[0]
54
+ result2 = pipe2(url)[0]
55
+ result3 = pipe3(url)[0]
56
+ label2, score2 = result2['label'], result2['score']
57
+ label3, score3 = result3['label'], result3['score']
58
+
59
+ final_label, final_score = calculate_weighted_prediction(label2, score2, label3, score3)
60
+
61
+ st.markdown("## 🛡️ **Prediction Result:**")
62
+ if final_score < 0.6:
63
+ st.warning(f"🤔 URLs in email content are UNCERTAIN - Confidence too low ({final_score:.2%}). Please review manually.")
64
+ elif final_label == "benign":
65
+ st.success(f"✅ URLs in email content are BENIGN (Confidence Score: {final_score:.2%})")
66
+ else:
67
+ st.error(f"⚠️ URLs in email content are MALICIOUS (Confidence Score: {final_score:.2%})")