|
import streamlit as st |
|
import torch |
|
import newspaper |
|
import json |
|
|
|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("sofzcc/distilbert-base-uncased-fake-news-checker") |
|
model = AutoModelForSequenceClassification.from_pretrained("sofzcc/distilbert-base-uncased-fake-news-checker") |
|
|
|
def extract_news_text(url): |
|
article = newspaper.Article(url=url, language='en') |
|
article.download() |
|
article.parse() |
|
|
|
article ={ |
|
"title": str(article.title), |
|
"text": str(article.text), |
|
"published_date": str(article.publish_date), |
|
"keywords": article.keywords, |
|
"summary": str(article.summary) |
|
} |
|
|
|
return article['text'] |
|
|
|
|
|
|
|
def predict_news(news_text): |
|
inputs = tokenizer(news_text, return_tensors="pt", truncation=True, padding=True, max_length=512) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
logits = outputs.logits |
|
predictions = torch.argmax(logits, dim=-1).item() |
|
return "Real" if predictions == 1 else "Fake" |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
|
|
|
|
def classify_text(text): |
|
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) |
|
inputs = {key: value.to(device) for key, value in inputs.items()} |
|
|
|
|
|
model.eval() |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
|
|
logits = outputs.logits |
|
|
|
|
|
probabilities = torch.nn.functional.softmax(logits, dim=-1) |
|
|
|
|
|
predicted_class = torch.argmax(probabilities, dim=1).item() |
|
|
|
|
|
labels = ['False', 'True'] |
|
|
|
|
|
predicted_label = labels[predicted_class] |
|
probabilities = probabilities.cpu().numpy() |
|
|
|
return predicted_label, probabilities |
|
|
|
|
|
|
|
st.title("Fake News Detector") |
|
|
|
|
|
with st.expander("Disclaimer"): |
|
st.markdown(""" |
|
|
|
**Important Notice:** |
|
|
|
This model was trained exclusively on news articles from Reuters. As a result, the model may be biased towards considering news from Reuters as "True" and may not accurately classify news from other sources. |
|
|
|
**Usage Warning:** |
|
|
|
- This model is intended for experimental and educational purposes only. |
|
- We do not take any responsibility for the outcomes or decisions made based on the results provided by this model. |
|
- The model should not be used for any critical or real-world applications, especially those that involve significant consequences or decision-making. |
|
- Users are encouraged to apply their own judgment and consult multiple sources when evaluating the credibility of news. |
|
|
|
**By using this model, you acknowledge and accept these terms and disclaimers.** |
|
""") |
|
|
|
st.write("Enter a news article URL below to check if it's real or fake:") |
|
|
|
news_url = st.text_area("News URL", height=100) |
|
|
|
|
|
if st.button("Evaluate URL"): |
|
if news_url: |
|
try: |
|
news_text = extract_news_text(news_url) |
|
predicted_label, probabilities = classify_text(news_text) |
|
st.write(f"The news article is predicted to be: **{predicted_label}**") |
|
except: |
|
st.write("It wasn't possible to fetch the article text. Enter the news article text below to check if it's real or fake.") |
|
else: |
|
st.write("Please enter some news URL to evaluate.") |
|
|
|
st.write("Enter a news article text below to check if it's real or fake:") |
|
news_text = st.text_area("News Text", height=300) |
|
if st.button("Evaluate Text"): |
|
if news_text: |
|
try: |
|
predicted_label, probabilities = classify_text(news_text) |
|
st.write(f"The news article is predicted to be: **{predicted_label}**") |
|
except: |
|
st.write("It wasn't possible to asses the article text.") |
|
else: |
|
st.write("Please enter some news URL to evaluate.") |
|
|