File size: 1,741 Bytes
8450003
50944f0
8450003
 
924d4e1
50944f0
 
924d4e1
b89ac72
924d4e1
8450003
 
 
 
b89ac72
50944f0
 
 
 
 
924d4e1
8450003
924d4e1
8450003
fe81869
8450003
fe81869
50944f0
8450003
50944f0
8450003
50944f0
 
b89ac72
924d4e1
 
8450003
 
 
924d4e1
8450003
924d4e1
50944f0
 
 
 
 
924d4e1
8450003
 
924d4e1
8450003
924d4e1
8450003
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pandas as pd
import re
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from collections import Counter

# 1. Load and clean data
df = pd.read_csv("spam.csv", encoding="latin1")[["v1", "v2"]]
df.columns = ["label", "text"]
df["label"] = df["label"].map({"ham": 0, "spam": 1})

# 2. Clean text
def clean_text(text):
    text = text.lower()
    text = re.sub(r"\W+", " ", text)
    return text.strip()

df["text"] = df["text"].apply(clean_text)

# 3. Split data
X_train, X_test, y_train, y_test = train_test_split(
    df["text"], df["label"], test_size=0.2, stratify=df["label"], random_state=42
)

# 4. Build and train model
model = make_pipeline(
    TfidfVectorizer(ngram_range=(1, 2), stop_words="english"),
    LogisticRegression(max_iter=1000, class_weight="balanced")
)

model.fit(X_train, y_train)

# 5. Evaluate
accuracy = accuracy_score(y_test, model.predict(X_test))
print(f"Validation Accuracy: {accuracy:.2%}")

# 6. Gradio prediction function
def predict_spam(message):
    cleaned = clean_text(message)
    pred = model.predict([cleaned])[0]
    prob = model.predict_proba([cleaned])[0][pred]
    label = "🚫 Spam" if pred == 1 else "📩 Not Spam (Ham)"
    return f"{label} (Confidence: {prob:.2%})"

# 7. Gradio UI
gr.Interface(
    fn=predict_spam,
    inputs=gr.Textbox(lines=4, label="Enter SMS Message"),
    outputs=gr.Text(label="Prediction"),
    title="SMS Spam Detector",
    description=f"Detects spam in SMS messages. Trained on uploaded CSV (Accuracy: {accuracy:.2%})."
).launch()