resolverkatla commited on
Commit
924d4e1
·
verified ·
1 Parent(s): 09617b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -41
app.py CHANGED
@@ -1,45 +1,39 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
- import pandas as pd
4
-
5
- # Load dataset from Hugging Face Hub
6
- dataset_path = "hf://datasets/ucirvine/sms_spam/plain_text/train-00000-of-00001.parquet"
7
- df = pd.read_parquet(dataset_path)
8
-
9
- # Load a spam classification model
10
- classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
11
-
12
- def spam_detector(text):
13
- """Detect if a message is spam or not, with confidence filtering."""
14
- result = classifier(text)
15
-
16
- # Debugging: Print the raw output from the classifier
17
- print("Model Output:", result)
18
-
19
- # Extract label and confidence score
20
- label = result[0]['label'].lower().strip()
21
- confidence = result[0]['score'] # Confidence score (0 to 1)
22
-
23
- # Confidence threshold (adjustable, 0.5 is standard)
24
- threshold = 0.7
25
-
26
- # Return based on confidence
27
- if label == "spam" and confidence >= threshold:
28
- return f"Spam (Confidence: {confidence:.2f})"
29
- else:
30
- return f"Not Spam (Confidence: {confidence:.2f})"
31
-
32
- # Create Gradio UI with enhanced styling
33
- app = gr.Interface(
34
- fn=spam_detector,
35
- inputs=gr.Textbox(label="Enter a message", placeholder="Type your message here..."),
36
- outputs=gr.Textbox(label="Prediction"),
37
- title="AI-Powered Spam Detector",
38
- description="Enter a message to check if it's spam or not, using a fine-tuned BERT model.",
39
  )
40
 
41
- # Run the app
42
  if __name__ == "__main__":
43
- print("Loaded dataset preview:")
44
- print(df.head())
45
- app.launch
 
1
  import gradio as gr
2
+ from datasets import load_dataset
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.naive_bayes import MultinomialNB
5
+ from sklearn.pipeline import make_pipeline
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.metrics import accuracy_score
8
+
9
+ # 1. Load dataset
10
+ dataset = load_dataset("ucirvine/sms_spam", split="train")
11
+ texts = dataset["sms"]
12
+ labels = [1 if label == "spam" else 0 for label in dataset["label"]] # spam=1, ham=0
13
+
14
+ # 2. Train/test split
15
+ X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
16
+
17
+ # 3. Create model pipeline (TF-IDF + Naive Bayes)
18
+ model = make_pipeline(TfidfVectorizer(), MultinomialNB())
19
+ model.fit(X_train, y_train)
20
+
21
+ # 4. Accuracy for reference
22
+ y_pred = model.predict(X_test)
23
+ print("Validation Accuracy:", accuracy_score(y_test, y_pred))
24
+
25
+ # 5. Gradio interface
26
+ def predict_spam(message):
27
+ pred = model.predict([message])[0]
28
+ return "📩 Not Spam (Ham)" if pred == 0 else "🚫 Spam"
29
+
30
+ iface = gr.Interface(
31
+ fn=predict_spam,
32
+ inputs=gr.Textbox(lines=4, label="Enter your SMS message"),
33
+ outputs=gr.Text(label="Prediction"),
34
+ title="📬 SMS Spam Detector",
35
+ description="Classifies whether an SMS message is spam or not using a Naive Bayes model."
 
 
 
36
  )
37
 
 
38
  if __name__ == "__main__":
39
+ iface.launch(share=False)