Spaces:
Sleeping
Sleeping
File size: 1,741 Bytes
8450003 50944f0 8450003 924d4e1 50944f0 924d4e1 b89ac72 924d4e1 8450003 b89ac72 50944f0 924d4e1 8450003 924d4e1 8450003 fe81869 8450003 fe81869 50944f0 8450003 50944f0 8450003 50944f0 b89ac72 924d4e1 8450003 924d4e1 8450003 924d4e1 50944f0 924d4e1 8450003 924d4e1 8450003 924d4e1 8450003 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import pandas as pd
import re
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from collections import Counter
# 1. Load and clean data
df = pd.read_csv("spam.csv", encoding="latin1")[["v1", "v2"]]
df.columns = ["label", "text"]
df["label"] = df["label"].map({"ham": 0, "spam": 1})
# 2. Clean text
def clean_text(text):
text = text.lower()
text = re.sub(r"\W+", " ", text)
return text.strip()
df["text"] = df["text"].apply(clean_text)
# 3. Split data
X_train, X_test, y_train, y_test = train_test_split(
df["text"], df["label"], test_size=0.2, stratify=df["label"], random_state=42
)
# 4. Build and train model
model = make_pipeline(
TfidfVectorizer(ngram_range=(1, 2), stop_words="english"),
LogisticRegression(max_iter=1000, class_weight="balanced")
)
model.fit(X_train, y_train)
# 5. Evaluate
accuracy = accuracy_score(y_test, model.predict(X_test))
print(f"Validation Accuracy: {accuracy:.2%}")
# 6. Gradio prediction function
def predict_spam(message):
cleaned = clean_text(message)
pred = model.predict([cleaned])[0]
prob = model.predict_proba([cleaned])[0][pred]
label = "🚫 Spam" if pred == 1 else "📩 Not Spam (Ham)"
return f"{label} (Confidence: {prob:.2%})"
# 7. Gradio UI
gr.Interface(
fn=predict_spam,
inputs=gr.Textbox(lines=4, label="Enter SMS Message"),
outputs=gr.Text(label="Prediction"),
title="SMS Spam Detector",
description=f"Detects spam in SMS messages. Trained on uploaded CSV (Accuracy: {accuracy:.2%})."
).launch()
|