Spaces:

resolverkatla
/

Spam_Detector

Sleeping

App Files Files Community

resolverkatla commited on May 26

Commit

50944f0

verified ·

1 Parent(s): 45c0da1

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -13

app.py CHANGED Viewed

@@ -1,38 +1,54 @@
 import gradio as gr
 from datasets import load_dataset
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.naive_bayes import MultinomialNB
 from sklearn.pipeline import make_pipeline
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 # 1. Load dataset
 dataset = load_dataset("ucirvine/sms_spam", split="train")
 texts = dataset["sms"]
-labels = [1 if label == "spam" else 0 for label in dataset["label"]]  # spam=1, ham=0
-# 2. Train/test split
-X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
-# 3. Create model pipeline (TF-IDF + Naive Bayes)
-model = make_pipeline(TfidfVectorizer(), MultinomialNB())
 model.fit(X_train, y_train)
-# 4. Accuracy for reference
 y_pred = model.predict(X_test)
 print("Validation Accuracy:", accuracy_score(y_test, y_pred))
-# 5. Gradio interface
 def predict_spam(message):
-    pred = model.predict([message])[0]
-    return "📩 Not Spam (Ham)" if pred == 0 else "🚫 Spam"
 iface = gr.Interface(
     fn=predict_spam,
     inputs=gr.Textbox(lines=4, label="Enter your SMS message"),
     outputs=gr.Text(label="Prediction"),
-    title="📬 SMS Spam Detector",
-    description="Classifies whether an SMS message is spam or not using a Naive Bayes model."
 )
 if __name__ == "__main__":

 import gradio as gr
+import re
 from datasets import load_dataset
 from sklearn.pipeline import make_pipeline
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 # 1. Load dataset
 dataset = load_dataset("ucirvine/sms_spam", split="train")
 texts = dataset["sms"]
+labels = [1 if label == "spam" else 0 for label in dataset["label"]]
+# 2. Clean text
+def clean_text(text):
+    text = text.lower()
+    text = re.sub(r"\W+", " ", text)
+    return text.strip()
+texts_cleaned = [clean_text(t) for t in texts]
+# 3. Train/test split
+X_train, X_test, y_train, y_test = train_test_split(texts_cleaned, labels, test_size=0.2, random_state=42)
+# 4. Build model: TF-IDF + Logistic Regression
+model = make_pipeline(
+    TfidfVectorizer(ngram_range=(1, 2), stop_words="english", max_df=0.9),
+    LogisticRegression(max_iter=1000, class_weight="balanced")
+)
 model.fit(X_train, y_train)
+# 5. Show validation accuracy
 y_pred = model.predict(X_test)
 print("Validation Accuracy:", accuracy_score(y_test, y_pred))
+# 6. Prediction function
 def predict_spam(message):
+    cleaned = clean_text(message)
+    pred = model.predict([cleaned])[0]
+    prob = model.predict_proba([cleaned])[0][pred]
+    label = "🚫 Spam" if pred == 1 else "📩 Not Spam (Ham)"
+    return f"{label} (Confidence: {prob:.2%})"
+# 7. Gradio UI
 iface = gr.Interface(
     fn=predict_spam,
     inputs=gr.Textbox(lines=4, label="Enter your SMS message"),
     outputs=gr.Text(label="Prediction"),
+    title="📬 Improved SMS Spam Detector",
+    description="Detects spam in SMS messages using Logistic Regression with TF-IDF bi-grams. Now with higher accuracy!"
 )
 if __name__ == "__main__":