File size: 1,508 Bytes
73a393b
 
359ba9b
73a393b
9b36b48
 
 
359ba9b
 
d2faca8
73a393b
 
09617b2
73a393b
09617b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73a393b
9b36b48
73a393b
 
9b36b48
73a393b
9b36b48
 
73a393b
 
 
 
359ba9b
 
09617b2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gradio as gr
from transformers import pipeline
import pandas as pd

# Load dataset from Hugging Face Hub
dataset_path = "hf://datasets/ucirvine/sms_spam/plain_text/train-00000-of-00001.parquet"
df = pd.read_parquet(dataset_path)

# Load a spam classification model
classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")

def spam_detector(text):
    """Detect if a message is spam or not, with confidence filtering."""
    result = classifier(text)
    
    # Debugging: Print the raw output from the classifier
    print("Model Output:", result)

    # Extract label and confidence score
    label = result[0]['label'].lower().strip()
    confidence = result[0]['score']  # Confidence score (0 to 1)

    # Confidence threshold (adjustable, 0.5 is standard)
    threshold = 0.7

    # Return based on confidence
    if label == "spam" and confidence >= threshold:
        return f"Spam (Confidence: {confidence:.2f})"
    else:
        return f"Not Spam (Confidence: {confidence:.2f})"

# Create Gradio UI with enhanced styling
app = gr.Interface(
    fn=spam_detector,
    inputs=gr.Textbox(label="Enter a message", placeholder="Type your message here..."),
    outputs=gr.Textbox(label="Prediction"),
    title="AI-Powered Spam Detector",
    description="Enter a message to check if it's spam or not, using a fine-tuned BERT model.",
)

# Run the app
if __name__ == "__main__":
    print("Loaded dataset preview:")
    print(df.head())
    app.launch