File size: 5,335 Bytes
e751950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import gradio as gr
import transformers
from transformers import pipeline
import tf_keras as keras
import pandas as pd
import tempfile
import os

# Load pre-trained spam classifier
spam_classifier = pipeline(
    "text-classification",
    model="mrm8488/bert-tiny-finetuned-sms-spam-detection"
)

def classify_batch(file):
    """Process uploaded CSV/TXT file with multiple emails"""
    try:
        results = []
        
        # Check if file exists
        if not file.name:
            raise gr.Error("No file uploaded")

        # --- CSV File Handling ---
        if file.name.endswith('.csv'):
            df = pd.read_csv(file)
            
            # Check for required email column
            if 'email' not in df.columns:
                raise gr.Error("CSV file must contain a column named 'email'")
                
            emails = df['email'].tolist()

        # --- Text File Handling ---
        elif file.name.endswith('.txt'):
            with open(file.name, 'r') as f:
                emails = f.readlines()
        
        # --- Unsupported Format ---
        else:
            raise gr.Error("Unsupported file format. Only CSV/TXT accepted")

        # Process emails (common for both formats)
        emails = emails[:100]  # Limit to 100 emails
        for email in emails:
            # Handle empty lines in text files
            if not email.strip():
                continue
                
            prediction = spam_classifier(email.strip())[0]
            results.append({
                "email": email.strip()[:50] + "...",
                "label": "SPAM" if prediction["label"] == "LABEL_1" else "HAM",
                "confidence": f"{prediction['score']:.4f}"
            })

        return pd.DataFrame(results)

    except gr.Error as e:
        raise e  # Show pop-up for expected errors
    except Exception as e:
        raise gr.Error(f"Processing error: {str(e)}")

def classify_text(text):
    result = spam_classifier(text)[0]
    return {
        "Spam": result["score"] if result["label"] == "LABEL_1" else 1 - result["score"],
        "Ham": result["score"] if result["label"] == "LABEL_0" else 1 - result["score"]
    }

with gr.Blocks(title="Spam Classifier Pro") as demo:
    gr.Markdown("# ๐Ÿ“ง Welcome to Spamedar!")
    
    
    with gr.Tab("โœ‰๏ธ Single Email"):
        gr.Interface(
            description="<h2>Copy your email to find out if it's a is Spam or Ham๐Ÿ‘‡<h2>",
            fn=classify_text,
            inputs=gr.Textbox(label="Input Email", lines=3),
            outputs=gr.Label(label="Classification"),
            examples=[
                ["Urgent: Verify your account details now!"],
                ["Hey, can we meet tomorrow to discuss the project?"],
                ["WINNER! You've been selected for a $1000 Walmart Gift Card!"],
                ["Your account needs verification. Click here to confirm your details."],
                ["Meeting rescheduled to Friday 2 PM"]
            ]
        )
    current_dir = os.getcwd()
    with gr.Tab("๐Ÿ“จ Multiple Emails"):
        gr.Markdown("## Upload email batch (CSV or TXT)")
        file_input = gr.File(label="Upload File", file_types=[".csv", ".txt"])
        clear_btn = gr.Button("Clear Selection", variant="secondary")
        output_table = gr.Dataframe(
            headers=["email", "label", "confidence"],
            datatype=["str", "str", "number"],
            interactive=False,
            label="Classification Results"
        )
        download_btn = gr.DownloadButton(label="Download Results")

        def process_file(file):
            """Process file and return (display_df, download_path)"""
            try:
                if file is None:
                    return pd.DataFrame(), None

                results_df = classify_batch(file)
                with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
                    results_df.to_csv(f.name, index=False)
                    return results_df, f.name
            except Exception as e:
                raise gr.Error(f"Error processing file: {str(e)}")

        def clear_selection():
            ###clear file input and results function
            return None, pd.DataFrame(), None
        
        file_input.upload(
            fn=process_file,
            inputs=file_input,
            outputs=[output_table, download_btn]  # Update both components
        )

        clear_btn.click(
            fn=clear_selection,
            outputs=[file_input, output_table, download_btn]
        )

        example_files= [
            os.path.join(os.getcwd(), "sample_emails.csv"),
            os.path.join(os.getcwd(), "batch_emails.txt"),
        ]
        if all(os.path.exists(f) for f in example_files):
            gr.Examples(
                examples=[[f] for f in example_files],
                inputs=file_input,
                outputs=[output_table, download_btn],
                fn=process_file,
                cache_examples=True,
                label="Click any example below to test:"
            )
            
        else:
            print("Warning: Example files missing. Place these in your project root:")
            print("- sample_emails.csv")
            print("- batch_emails.txt")
        
if __name__ == "__main__":
    demo.launch(share=True)