File size: 4,286 Bytes
544f27d
 
d5fbc7e
 
544f27d
d0882dd
 
 
 
 
544f27d
d5fbc7e
 
8ab0dc9
 
d5fbc7e
 
 
 
 
 
 
 
 
 
8ab0dc9
 
544f27d
 
 
 
 
 
 
 
 
d4d4d4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544f27d
 
 
d4d4d4d
57adcc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4d4d4d
8ab0dc9
544f27d
8ab0dc9
544f27d
57adcc6
 
8ab0dc9
d4d4d4d
 
 
 
 
57adcc6
 
 
 
 
 
 
 
 
 
 
 
 
544f27d
8ab0dc9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import streamlit as st
import torch
import newspaper
import json

# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("sofzcc/distilbert-base-uncased-fake-news-checker")
model = AutoModelForSequenceClassification.from_pretrained("sofzcc/distilbert-base-uncased-fake-news-checker")

def extract_news_text(url):
    article = newspaper.Article(url=url, language='en')
    article.download()
    article.parse()

    article ={
        "title": str(article.title),
        "text": str(article.text),
        "published_date": str(article.publish_date),
        "keywords": article.keywords,
        "summary": str(article.summary)
    }

    return article['text']


# Function to predict if news is real or fake
def predict_news(news_text):
    inputs = tokenizer(news_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1).item()
    return "Real" if predictions == 1 else "Fake"

# Check if a GPU is available and move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define text classification function
def classify_text(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    inputs = {key: value.to(device) for key, value in inputs.items()}

    # Ensure the model is in evaluation mode
    model.eval()

    # Perform the forward pass
    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits

    # Convert logits to probabilities
    probabilities = torch.nn.functional.softmax(logits, dim=-1)

    # Get the predicted class
    predicted_class = torch.argmax(probabilities, dim=1).item()

    # Define the label mapping
    labels = ['False', 'True']

    # Return the predicted label and probabilities
    predicted_label = labels[predicted_class]
    probabilities = probabilities.cpu().numpy()

    return predicted_label, probabilities


# Streamlit App
st.title("Fake News Detector")

# Add disclaimer
with st.expander("Disclaimer"):
    st.markdown("""
    
    **Important Notice:**
    
    This model was trained exclusively on news articles from Reuters. As a result, the model may be biased towards considering news from Reuters as "True" and may not accurately classify news from other sources. 
    
    **Usage Warning:**
    
    - This model is intended for experimental and educational purposes only.
    - We do not take any responsibility for the outcomes or decisions made based on the results provided by this model.
    - The model should not be used for any critical or real-world applications, especially those that involve significant consequences or decision-making.
    - Users are encouraged to apply their own judgment and consult multiple sources when evaluating the credibility of news.
    
    **By using this model, you acknowledge and accept these terms and disclaimers.**
    """)

st.write("Enter a news article URL below to check if it's real or fake:")

news_url = st.text_area("News URL", height=100)


if st.button("Evaluate URL"):
    if news_url:
        try:
            news_text = extract_news_text(news_url)
            predicted_label, probabilities = classify_text(news_text)
            st.write(f"The news article is predicted to be: **{predicted_label}**")
        except:
            st.write("It wasn't possible to fetch the article text. Enter the news article text below to check if it's real or fake.")
    else:
        st.write("Please enter some news URL to evaluate.")

st.write("Enter a news article text below to check if it's real or fake:")
news_text = st.text_area("News Text", height=300)
if st.button("Evaluate Text"):
    if news_text:
        try:       
            predicted_label, probabilities = classify_text(news_text)
            st.write(f"The news article is predicted to be: **{predicted_label}**")
        except:
            st.write("It wasn't possible to asses the article text.")            
    else:
        st.write("Please enter some news URL to evaluate.")