import gradio as gr import pandas as pd import numpy as np from textblob import TextBlob from typing import List, Dict, Tuple from dataclasses import dataclass from pathlib import Path import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @dataclass class RecommendationWeights: visibility: float sentiment: float popularity: float class TweetPreprocessor: def __init__(self, data_path: Path): self.data = self._load_data(data_path) @staticmethod def _load_data(data_path: Path) -> pd.DataFrame: try: data = pd.read_csv(data_path) required_columns = {'Text', 'Retweets', 'Likes'} if not required_columns.issubset(data.columns): raise ValueError(f"Missing required columns: {required_columns - set(data.columns)}") return data except Exception as e: logger.error(f"Error loading data: {e}") raise def calculate_metrics(self) -> pd.DataFrame: self.data['Sentiment'] = self.data['Text'].apply(self._get_sentiment) self.data['Popularity'] = self._normalize_popularity() self.data['Credibility'] = np.random.choice([0, 1], size=len(self.data), p=[0.3, 0.7]) return self.data @staticmethod def _get_sentiment(text: str) -> float: try: return TextBlob(str(text)).sentiment.polarity except Exception as e: logger.warning(f"Error calculating sentiment: {e}") return 0.0 def _normalize_popularity(self) -> pd.Series: popularity = self.data['Retweets'] + self.data['Likes'] return (popularity - popularity.min()) / (popularity.max() - popularity.min() + 1e-6) class RecommendationSystem: def __init__(self, data_path: Path): self.preprocessor = TweetPreprocessor(data_path) self.data = None self.setup_system() def setup_system(self): self.data = self.preprocessor.calculate_metrics() def get_recommendations(self, weights: RecommendationWeights, num_recommendations: int = 10) -> Dict: if not self._validate_weights(weights): return {"error": "Invalid weights provided"} normalized_weights = self._normalize_weights(weights) self.data['Final_Score'] = ( self.data['Credibility'] * normalized_weights.visibility + self.data['Sentiment'] * normalized_weights.sentiment + self.data['Popularity'] * normalized_weights.popularity ) top_recommendations = ( self.data.nlargest(num_recommendations, 'Final_Score') ) return self._format_recommendations(top_recommendations) def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict: formatted_results = [] for _, row in recommendations.iterrows(): score_details = { "总分": f"{row['Final_Score']:.2f}", "可信度": "可信" if row['Credibility'] > 0 else "存疑", "情感倾向": self._get_sentiment_label(row['Sentiment']), "热度": f"{row['Popularity']:.2f}", "互动数": f"点赞 {row['Likes']} · 转发 {row['Retweets']}" } formatted_results.append({ "text": row['Text'], "scores": score_details }) return { "recommendations": formatted_results, "score_explanation": self._get_score_explanation() } @staticmethod def _get_sentiment_label(sentiment_score: float) -> str: if sentiment_score > 0.3: return "积极" elif sentiment_score < -0.3: return "消极" return "中性" @staticmethod def _validate_weights(weights: RecommendationWeights) -> bool: return all(getattr(weights, field) >= 0 for field in weights.__dataclass_fields__) @staticmethod def _normalize_weights(weights: RecommendationWeights) -> RecommendationWeights: total = weights.visibility + weights.sentiment + weights.popularity if total == 0: return RecommendationWeights(1/3, 1/3, 1/3) return RecommendationWeights( visibility=weights.visibility / total, sentiment=weights.sentiment / total, popularity=weights.popularity / total ) @staticmethod def _get_score_explanation() -> Dict[str, str]: return { "可信度": "内容可信度评估", "情感倾向": "文本的情感分析结果", "热度": "基于点赞和转发的热度分数" } def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.Interface: with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown(""" # 推文推荐系统 调整权重以获取个性化推荐 """) with gr.Row(): with gr.Column(scale=1): visibility_weight = gr.Slider(0, 1, 0.5, label="可信度权重", info="调整对内容可信度的重视程度") sentiment_weight = gr.Slider(0, 1, 0.3, label="情感倾向权重", info="调整对情感倾向的重视程度") popularity_weight = gr.Slider(0, 1, 0.2, label="热度权重", info="调整对内容热度的重视程度") submit_btn = gr.Button("获取推荐", variant="primary") with gr.Column(scale=2): output_html = gr.HTML() def format_recommendations(raw_recommendations): html = '
' # 添加评分说明 html += '''

评分说明

''' # 显示推荐的tweets for i, rec in enumerate(raw_recommendations["recommendations"], 1): scores = rec["scores"] html += f'''
{rec["text"]}
总分: {scores["总分"]} 可信度: {scores["可信度"]} 情感: {scores["情感倾向"]} 热度: {scores["热度"]} {scores["互动数"]}
''' html += '
' return html submit_btn.click( fn=lambda v, s, p: format_recommendations( recommendation_system.get_recommendations(RecommendationWeights(v, s, p)) ), inputs=[visibility_weight, sentiment_weight, popularity_weight], outputs=output_html ) return interface def main(): try: recommendation_system = RecommendationSystem( data_path=Path('twitter_dataset.csv') ) interface = create_gradio_interface(recommendation_system) interface.launch() except Exception as e: logger.error(f"Application failed to start: {e}") raise if __name__ == "__main__": main()