raufjivad
commited on
Commit
·
e751950
1
Parent(s):
845072f
upload files
Browse files
app.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import transformers
|
3 |
+
from transformers import pipeline
|
4 |
+
import tf_keras as keras
|
5 |
+
import pandas as pd
|
6 |
+
import tempfile
|
7 |
+
import os
|
8 |
+
|
9 |
+
# Load pre-trained spam classifier
|
10 |
+
spam_classifier = pipeline(
|
11 |
+
"text-classification",
|
12 |
+
model="mrm8488/bert-tiny-finetuned-sms-spam-detection"
|
13 |
+
)
|
14 |
+
|
15 |
+
def classify_batch(file):
|
16 |
+
"""Process uploaded CSV/TXT file with multiple emails"""
|
17 |
+
try:
|
18 |
+
results = []
|
19 |
+
|
20 |
+
# Check if file exists
|
21 |
+
if not file.name:
|
22 |
+
raise gr.Error("No file uploaded")
|
23 |
+
|
24 |
+
# --- CSV File Handling ---
|
25 |
+
if file.name.endswith('.csv'):
|
26 |
+
df = pd.read_csv(file)
|
27 |
+
|
28 |
+
# Check for required email column
|
29 |
+
if 'email' not in df.columns:
|
30 |
+
raise gr.Error("CSV file must contain a column named 'email'")
|
31 |
+
|
32 |
+
emails = df['email'].tolist()
|
33 |
+
|
34 |
+
# --- Text File Handling ---
|
35 |
+
elif file.name.endswith('.txt'):
|
36 |
+
with open(file.name, 'r') as f:
|
37 |
+
emails = f.readlines()
|
38 |
+
|
39 |
+
# --- Unsupported Format ---
|
40 |
+
else:
|
41 |
+
raise gr.Error("Unsupported file format. Only CSV/TXT accepted")
|
42 |
+
|
43 |
+
# Process emails (common for both formats)
|
44 |
+
emails = emails[:100] # Limit to 100 emails
|
45 |
+
for email in emails:
|
46 |
+
# Handle empty lines in text files
|
47 |
+
if not email.strip():
|
48 |
+
continue
|
49 |
+
|
50 |
+
prediction = spam_classifier(email.strip())[0]
|
51 |
+
results.append({
|
52 |
+
"email": email.strip()[:50] + "...",
|
53 |
+
"label": "SPAM" if prediction["label"] == "LABEL_1" else "HAM",
|
54 |
+
"confidence": f"{prediction['score']:.4f}"
|
55 |
+
})
|
56 |
+
|
57 |
+
return pd.DataFrame(results)
|
58 |
+
|
59 |
+
except gr.Error as e:
|
60 |
+
raise e # Show pop-up for expected errors
|
61 |
+
except Exception as e:
|
62 |
+
raise gr.Error(f"Processing error: {str(e)}")
|
63 |
+
|
64 |
+
def classify_text(text):
|
65 |
+
result = spam_classifier(text)[0]
|
66 |
+
return {
|
67 |
+
"Spam": result["score"] if result["label"] == "LABEL_1" else 1 - result["score"],
|
68 |
+
"Ham": result["score"] if result["label"] == "LABEL_0" else 1 - result["score"]
|
69 |
+
}
|
70 |
+
|
71 |
+
with gr.Blocks(title="Spam Classifier Pro") as demo:
|
72 |
+
gr.Markdown("# 📧 Welcome to Spamedar!")
|
73 |
+
|
74 |
+
|
75 |
+
with gr.Tab("✉️ Single Email"):
|
76 |
+
gr.Interface(
|
77 |
+
description="<h2>Copy your email to find out if it's a is Spam or Ham👇<h2>",
|
78 |
+
fn=classify_text,
|
79 |
+
inputs=gr.Textbox(label="Input Email", lines=3),
|
80 |
+
outputs=gr.Label(label="Classification"),
|
81 |
+
examples=[
|
82 |
+
["Urgent: Verify your account details now!"],
|
83 |
+
["Hey, can we meet tomorrow to discuss the project?"],
|
84 |
+
["WINNER! You've been selected for a $1000 Walmart Gift Card!"],
|
85 |
+
["Your account needs verification. Click here to confirm your details."],
|
86 |
+
["Meeting rescheduled to Friday 2 PM"]
|
87 |
+
]
|
88 |
+
)
|
89 |
+
current_dir = os.getcwd()
|
90 |
+
with gr.Tab("📨 Multiple Emails"):
|
91 |
+
gr.Markdown("## Upload email batch (CSV or TXT)")
|
92 |
+
file_input = gr.File(label="Upload File", file_types=[".csv", ".txt"])
|
93 |
+
clear_btn = gr.Button("Clear Selection", variant="secondary")
|
94 |
+
output_table = gr.Dataframe(
|
95 |
+
headers=["email", "label", "confidence"],
|
96 |
+
datatype=["str", "str", "number"],
|
97 |
+
interactive=False,
|
98 |
+
label="Classification Results"
|
99 |
+
)
|
100 |
+
download_btn = gr.DownloadButton(label="Download Results")
|
101 |
+
|
102 |
+
def process_file(file):
|
103 |
+
"""Process file and return (display_df, download_path)"""
|
104 |
+
try:
|
105 |
+
if file is None:
|
106 |
+
return pd.DataFrame(), None
|
107 |
+
|
108 |
+
results_df = classify_batch(file)
|
109 |
+
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
|
110 |
+
results_df.to_csv(f.name, index=False)
|
111 |
+
return results_df, f.name
|
112 |
+
except Exception as e:
|
113 |
+
raise gr.Error(f"Error processing file: {str(e)}")
|
114 |
+
|
115 |
+
def clear_selection():
|
116 |
+
###clear file input and results function
|
117 |
+
return None, pd.DataFrame(), None
|
118 |
+
|
119 |
+
file_input.upload(
|
120 |
+
fn=process_file,
|
121 |
+
inputs=file_input,
|
122 |
+
outputs=[output_table, download_btn] # Update both components
|
123 |
+
)
|
124 |
+
|
125 |
+
clear_btn.click(
|
126 |
+
fn=clear_selection,
|
127 |
+
outputs=[file_input, output_table, download_btn]
|
128 |
+
)
|
129 |
+
|
130 |
+
example_files= [
|
131 |
+
os.path.join(os.getcwd(), "sample_emails.csv"),
|
132 |
+
os.path.join(os.getcwd(), "batch_emails.txt"),
|
133 |
+
]
|
134 |
+
if all(os.path.exists(f) for f in example_files):
|
135 |
+
gr.Examples(
|
136 |
+
examples=[[f] for f in example_files],
|
137 |
+
inputs=file_input,
|
138 |
+
outputs=[output_table, download_btn],
|
139 |
+
fn=process_file,
|
140 |
+
cache_examples=True,
|
141 |
+
label="Click any example below to test:"
|
142 |
+
)
|
143 |
+
|
144 |
+
else:
|
145 |
+
print("Warning: Example files missing. Place these in your project root:")
|
146 |
+
print("- sample_emails.csv")
|
147 |
+
print("- batch_emails.txt")
|
148 |
+
|
149 |
+
if __name__ == "__main__":
|
150 |
+
demo.launch(share=True)
|