Spaces:

YixuanWang
/

interactive_recommedation

Sleeping

App Files Files Community

YixuanWang commited on Dec 1, 2024

Commit

c4ee5b7

verified ·

1 Parent(s): 583a378

Upload 3 files

Browse files

Files changed (3) hide show

app.py +232 -0
requirements.txt +6 -0
twitter_dataset.csv +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from textblob import TextBlob
+from typing import List, Dict, Tuple
+from dataclasses import dataclass
+from pathlib import Path
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+@dataclass
+class RecommendationWeights:
+    visibility: float
+    sentiment: float
+    popularity: float
+class TweetPreprocessor:
+    def __init__(self, data_path: Path):
+        self.data = self._load_data(data_path)
+        self.model_name = "hamzab/roberta-fake-news-classification"
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model, self.tokenizer = self._load_model()
+    def _load_model(self):
+        tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        model = AutoModelForSequenceClassification.from_pretrained(self.model_name).to(self.device)
+        return model, tokenizer
+    @staticmethod
+    def _load_data(data_path: Path) -> pd.DataFrame:
+        try:
+            data = pd.read_csv(data_path)
+            required_columns = {'Text', 'Retweets', 'Likes'}
+            if not required_columns.issubset(data.columns):
+                raise ValueError(f"Missing required columns: {required_columns - set(data.columns)}")
+            return data
+        except Exception as e:
+            logger.error(f"Error loading data: {e}")
+            raise
+    def calculate_metrics(self) -> pd.DataFrame:
+        # Calculate sentiment
+        self.data['Sentiment'] = self.data['Text'].apply(lambda x: TextBlob(x).sentiment.polarity)
+        # Calculate popularity
+        self.data['Popularity'] = self.data['Retweets'] + self.data['Likes']
+        self.data['Popularity'] = (self.data['Popularity'] - self.data['Popularity'].mean()) / self.data['Popularity'].std()
+        self.data['Popularity'] = self.data['Popularity'] / self.data['Popularity'].abs().max()
+        # Calculate credibility using fake news model
+        batch_size = 100
+        predictions = []
+        for i in range(0, len(self.data), batch_size):
+            batch = self.data['Text'][i:i + batch_size].tolist()
+            inputs = self.tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=128)
+            inputs = {key: val.to(self.device) for key, val in inputs.items()}
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                predictions.extend(outputs.logits.argmax(dim=1).cpu().numpy())
+        self.data['Credibility'] = [1 if pred == 1 else -1 for pred in predictions]
+        return self.data
+class RecommendationSystem:
+    def __init__(self, data_path: Path):
+        self.preprocessor = TweetPreprocessor(data_path)
+        self.data = None
+        self.setup_system()
+    def setup_system(self):
+        self.data = self.preprocessor.calculate_metrics()
+    def get_recommendations(self, weights: RecommendationWeights, num_recommendations: int = 10) -> Dict:
+        if not self._validate_weights(weights):
+            return {"error": "Invalid weights provided"}
+        normalized_weights = self._normalize_weights(weights)
+        self.data['Final_Score'] = (
+            self.data['Credibility'] * normalized_weights.visibility +
+            self.data['Sentiment'] * normalized_weights.sentiment +
+            self.data['Popularity'] * normalized_weights.popularity
+        )
+        top_recommendations = (
+            self.data.nlargest(100, 'Final_Score')
+            .sample(num_recommendations)
+        )
+        return self._format_recommendations(top_recommendations)
+    def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict:
+        formatted_results = []
+        for _, row in recommendations.iterrows():
+            score_details = {
+                "score": f"{row['Final_Score']:.2f}",
+                "credibility": "Reliable" if row['Credibility'] > 0 else "Uncertain",
+                "sentiment": self._get_sentiment_label(row['Sentiment']),
+                "popularity": f"{row['Popularity']:.2f}",
+                "engagement": f"Likes {row['Likes']} · Retweets {row['Retweets']}"
+            }
+            formatted_results.append({
+                "text": row['Text'],
+                "scores": score_details
+            })
+        return {
+            "recommendations": formatted_results,
+            "score_explanation": self._get_score_explanation()
+        }
+    @staticmethod
+    def _get_sentiment_label(sentiment_score: float) -> str:
+        if sentiment_score > 0.3:
+            return "Positive"
+        elif sentiment_score < -0.3:
+            return "Negative"
+        return "Neutral"
+    @staticmethod
+    def _validate_weights(weights: RecommendationWeights) -> bool:
+        return all(getattr(weights, field) >= 0 for field in weights.__dataclass_fields__)
+    @staticmethod
+    def _normalize_weights(weights: RecommendationWeights) -> RecommendationWeights:
+        total = weights.visibility + weights.sentiment + weights.popularity
+        if total == 0:
+            return RecommendationWeights(1/3, 1/3, 1/3)
+        return RecommendationWeights(
+            visibility=weights.visibility / total,
+            sentiment=weights.sentiment / total,
+            popularity=weights.popularity / total
+        )
+    @staticmethod
+    def _get_score_explanation() -> Dict[str, str]:
+        return {
+            "Credibility": "Content reliability assessment",
+            "Sentiment": "Text emotional analysis result",
+            "Popularity": "Score based on likes and retweets"
+        }
+def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.Interface:
+    with gr.Blocks(theme=gr.themes.Soft()) as interface:
+        gr.Markdown("""
+        # Tweet Recommendation System
+        Adjust weights to get personalized recommendations
+        Note: To protect user privacy, some tweet content has been redacted or anonymized.
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                visibility_weight = gr.Slider(0, 1, 0.5, label="Credibility Weight", info="Adjust importance of content credibility")
+                sentiment_weight = gr.Slider(0, 1, 0.3, label="Sentiment Weight", info="Adjust importance of emotional tone")
+                popularity_weight = gr.Slider(0, 1, 0.2, label="Popularity Weight", info="Adjust importance of engagement metrics")
+                submit_btn = gr.Button("Get Recommendations", variant="primary")
+            with gr.Column(scale=2):
+                output_html = gr.HTML()
+        def format_recommendations(raw_recommendations):
+            html = '<div style="font-family: sans-serif;">'
+            html += '''
+            <div style="margin-bottom: 20px; padding: 15px; background-color: #1a1a1a; color: white; border-radius: 8px;">
+                <h3 style="margin-top: 0;">Score Guide</h3>
+                <ul style="margin: 0;">
+                    <li><strong>Credibility</strong>: Assessment of content reliability</li>
+                    <li><strong>Sentiment</strong>: Text emotional analysis (Positive/Negative/Neutral)</li>
+                    <li><strong>Popularity</strong>: Normalized score based on likes and retweets</li>
+                </ul>
+            </div>
+            '''
+            for i, rec in enumerate(raw_recommendations["recommendations"], 1):
+                scores = rec["scores"]
+                html += f'''
+                <div style="margin-bottom: 15px; padding: 15px; border: 1px solid #ddd; border-radius: 8px;">
+                    <div style="margin-bottom: 10px; font-size: 1.1em;">{rec["text"]}</div>
+                    <div style="display: flex; flex-wrap: wrap; gap: 10px; font-size: 0.9em;">
+                        <span style="padding: 3px 8px; background-color: #1976d2; color: white; border-radius: 4px;">
+                            Score: {scores["score"]}
+                        </span>
+                        <span style="padding: 3px 8px; background-color: #2e7d32; color: white; border-radius: 4px;">
+                            Credibility: {scores["credibility"]}
+                        </span>
+                        <span style="padding: 3px 8px; background-color: #ed6c02; color: white; border-radius: 4px;">
+                            Sentiment: {scores["sentiment"]}
+                        </span>
+                        <span style="padding: 3px 8px; background-color: #d32f2f; color: white; border-radius: 4px;">
+                            Popularity: {scores["popularity"]}
+                        </span>
+                        <span style="padding: 3px 8px; background-color: #7b1fa2; color: white; border-radius: 4px;">
+                            Engagement: {scores["engagement"]}
+                        </span>
+                    </div>
+                </div>
+                '''
+            html += '</div>'
+            return html
+        def get_recommendations_with_weights(v, s, p):
+            weights = RecommendationWeights(v, s, p)
+            return format_recommendations(recommendation_system.get_recommendations(weights))
+        submit_btn.click(
+            fn=get_recommendations_with_weights,
+            inputs=[visibility_weight, sentiment_weight, popularity_weight],
+            outputs=output_html
+        )
+    return interface
+def main():
+    try:
+        recommendation_system = RecommendationSystem(
+            data_path=Path('twitter_dataset.csv')
+        )
+        interface = create_gradio_interface(recommendation_system)
+        interface.launch()
+    except Exception as e:
+        logger.error(f"Application failed to start: {e}")
+        raise
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+transformers
+torch
+gradio
+pandas
+numpy
+textblob

twitter_dataset.csv ADDED Viewed

The diff for this file is too large to render. See raw diff