YixuanWang's picture
Create app.py
efc4793 verified
raw
history blame
3.16 kB
import gradio as gr
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from textblob import TextBlob
import os
from huggingface_hub import login
# Get the Hugging Face API token from the environment variable
hf_token = os.getenv("pasavectoi")
login(hf_token)
# Load the dataset from the local file
data = pd.read_csv('twitter_dataset.csv').head(1000)
# Calculate sentiment polarity and popularity
data['Sentiment'] = data['Text'].apply(lambda x: TextBlob(x).sentiment.polarity)
data['Popularity'] = data['Retweets'] + data['Likes']
data['Popularity'] = (data['Popularity'] - data['Popularity'].mean()) / data['Popularity'].std()
data['Popularity'] = data['Popularity'] / data['Popularity'].abs().max()
# Load the fake news classification model
model_name = "hamzab/roberta-fake-news-classification"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# Process tweets in batches to avoid memory issues
batch_size = 100
predictions = []
for i in range(0, len(data), batch_size):
batch = data['Text'][i:i + batch_size].tolist()
inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=128)
inputs = {key: val.to(device) for key, val in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
predictions.extend(outputs.logits.argmax(dim=1).cpu().numpy())
data['Fake_News_Prediction'] = predictions
data['Credibility'] = data['Fake_News_Prediction'].apply(lambda x: 1 if x == 1 else -1)
# Define the prediction and recommendation function
def predict_and_recommend(title, text, visibility_weight, sentiment_weight, popularity_weight):
# Adjust weights and calculate the final score
total_weight = visibility_weight + sentiment_weight + popularity_weight
visibility_weight /= total_weight
sentiment_weight /= total_weight
popularity_weight /= total_weight
# Update final visibility score with user-defined weights
data['User_Final_Visibility_Score'] = (
data['Credibility'] * visibility_weight +
data['Sentiment'] * sentiment_weight +
data['Popularity'] * popularity_weight
)
# Sort and randomly sample 10 recommendations
top_100_data = data.nlargest(100, 'User_Final_Visibility_Score')
recommended_data = top_100_data.sample(10)
return recommended_data[['Text', 'User_Final_Visibility_Score']]
# Set up Gradio interface
iface = gr.Interface(
fn=predict_and_recommend,
inputs=[
gr.Textbox(label="Title"),
gr.Textbox(label="Text", lines=10),
gr.Slider(0, 1, 0.5, label="Visibility Weight"),
gr.Slider(0, 1, 0.3, label="Sentiment Weight"),
gr.Slider(0, 1, 0.2, label="Popularity Weight")
],
outputs="dataframe",
title="Customizable Fake News Recommendation System",
description="Adjust weights to receive customized tweet recommendations based on visibility, sentiment, and popularity."
)
iface.launch()