Spaces:

resolverkatla
/

Spam_Detector

Sleeping

App Files Files Community

resolverkatla commited on May 26

Commit

924d4e1

verified ·

1 Parent(s): 09617b2

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -41

app.py CHANGED Viewed

@@ -1,45 +1,39 @@
 import gradio as gr
-from transformers import pipeline
-import pandas as pd
-# Load dataset from Hugging Face Hub
-dataset_path = "hf://datasets/ucirvine/sms_spam/plain_text/train-00000-of-00001.parquet"
-df = pd.read_parquet(dataset_path)
-# Load a spam classification model
-classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
-def spam_detector(text):
-    """Detect if a message is spam or not, with confidence filtering."""
-    result = classifier(text)
-    # Debugging: Print the raw output from the classifier
-    print("Model Output:", result)
-    # Extract label and confidence score
-    label = result[0]['label'].lower().strip()
-    confidence = result[0]['score']  # Confidence score (0 to 1)
-    # Confidence threshold (adjustable, 0.5 is standard)
-    threshold = 0.7
-    # Return based on confidence
-    if label == "spam" and confidence >= threshold:
-        return f"Spam (Confidence: {confidence:.2f})"
-    else:
-        return f"Not Spam (Confidence: {confidence:.2f})"
-# Create Gradio UI with enhanced styling
-app = gr.Interface(
-    fn=spam_detector,
-    inputs=gr.Textbox(label="Enter a message", placeholder="Type your message here..."),
-    outputs=gr.Textbox(label="Prediction"),
-    title="AI-Powered Spam Detector",
-    description="Enter a message to check if it's spam or not, using a fine-tuned BERT model.",
 )
-# Run the app
 if __name__ == "__main__":
-    print("Loaded dataset preview:")
-    print(df.head())
-    app.launch

 import gradio as gr
+from datasets import load_dataset
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.pipeline import make_pipeline
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+# 1. Load dataset
+dataset = load_dataset("ucirvine/sms_spam", split="train")
+texts = dataset["sms"]
+labels = [1 if label == "spam" else 0 for label in dataset["label"]]  # spam=1, ham=0
+# 2. Train/test split
+X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
+# 3. Create model pipeline (TF-IDF + Naive Bayes)
+model = make_pipeline(TfidfVectorizer(), MultinomialNB())
+model.fit(X_train, y_train)
+# 4. Accuracy for reference
+y_pred = model.predict(X_test)
+print("Validation Accuracy:", accuracy_score(y_test, y_pred))
+# 5. Gradio interface
+def predict_spam(message):
+    pred = model.predict([message])[0]
+    return "📩 Not Spam (Ham)" if pred == 0 else "🚫 Spam"
+iface = gr.Interface(
+    fn=predict_spam,
+    inputs=gr.Textbox(lines=4, label="Enter your SMS message"),
+    outputs=gr.Text(label="Prediction"),
+    title="📬 SMS Spam Detector",
+    description="Classifies whether an SMS message is spam or not using a Naive Bayes model."
 )
 if __name__ == "__main__":
+    iface.launch(share=False)