Spaces:

YixuanWang
/

Interactive-Recommendation-System

Sleeping

App Files Files Community

Interactive-Recommendation-System / app.py

YixuanWang

Create app.py

efc4793 verified 8 months ago

raw

history blame

3.16 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from textblob import TextBlob
	import os
	from huggingface_hub import login

	# Get the Hugging Face API token from the environment variable
	hf_token = os.getenv("pasavectoi")
	login(hf_token)

	# Load the dataset from the local file
	data = pd.read_csv('twitter_dataset.csv').head(1000)

	# Calculate sentiment polarity and popularity
	data['Sentiment'] = data['Text'].apply(lambda x: TextBlob(x).sentiment.polarity)
	data['Popularity'] = data['Retweets'] + data['Likes']
	data['Popularity'] = (data['Popularity'] - data['Popularity'].mean()) / data['Popularity'].std()
	data['Popularity'] = data['Popularity'] / data['Popularity'].abs().max()

	# Load the fake news classification model
	model_name = "hamzab/roberta-fake-news-classification"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = model.to(device)

	# Process tweets in batches to avoid memory issues
	batch_size = 100
	predictions = []
	for i in range(0, len(data), batch_size):
	batch = data['Text'][i:i + batch_size].tolist()
	inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=128)
	inputs = {key: val.to(device) for key, val in inputs.items()}
	with torch.no_grad():
	outputs = model(**inputs)
	predictions.extend(outputs.logits.argmax(dim=1).cpu().numpy())

	data['Fake_News_Prediction'] = predictions
	data['Credibility'] = data['Fake_News_Prediction'].apply(lambda x: 1 if x == 1 else -1)

	# Define the prediction and recommendation function
	def predict_and_recommend(title, text, visibility_weight, sentiment_weight, popularity_weight):
	# Adjust weights and calculate the final score
	total_weight = visibility_weight + sentiment_weight + popularity_weight
	visibility_weight /= total_weight
	sentiment_weight /= total_weight
	popularity_weight /= total_weight

	# Update final visibility score with user-defined weights
	data['User_Final_Visibility_Score'] = (
	data['Credibility'] * visibility_weight +
	data['Sentiment'] * sentiment_weight +
	data['Popularity'] * popularity_weight
	)
	# Sort and randomly sample 10 recommendations
	top_100_data = data.nlargest(100, 'User_Final_Visibility_Score')
	recommended_data = top_100_data.sample(10)

	return recommended_data[['Text', 'User_Final_Visibility_Score']]

	# Set up Gradio interface
	iface = gr.Interface(
	fn=predict_and_recommend,
	inputs=[
	gr.Textbox(label="Title"),
	gr.Textbox(label="Text", lines=10),
	gr.Slider(0, 1, 0.5, label="Visibility Weight"),
	gr.Slider(0, 1, 0.3, label="Sentiment Weight"),
	gr.Slider(0, 1, 0.2, label="Popularity Weight")
	],
	outputs="dataframe",
	title="Customizable Fake News Recommendation System",
	description="Adjust weights to receive customized tweet recommendations based on visibility, sentiment, and popularity."
	)

	iface.launch()