import gradio as gr import re from collections import Counter from datetime import datetime import emoji from transformers import pipeline import logging from typing import Tuple, List, Optional # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class CommentAnalyzer: def __init__(self): """Initialize the analyzer with sentiment model and compile regex patterns""" try: self.sentiment_model = pipeline("sentiment-analysis") except Exception as e: logger.error(f"Failed to load sentiment model: {e}") raise # Compile regex patterns for better performance self.mention_pattern = re.compile(r'@[\w\.]+') self.comment_pattern = re.compile( r'Фото профиля\s+(.+?)\s+' # Username r'((?:(?!Фото профиля).)+?)\s+' # Comment text r'(\d+)?\s*(?:нравится|like[s]?)?\s*' # Likes count r'(\d+)\s*(?:н|w)' # Week number , re.DOTALL ) def clean_text(self, text: str) -> str: """Clean text by removing extra whitespace and normalizing line breaks""" return ' '.join(text.split()) def count_emojis(self, text: str) -> int: """Count the number of emoji characters in text""" return len([c for c in text if c in emoji.EMOJI_DATA]) def extract_mentions(self, text: str) -> List[str]: """Extract @mentions from text""" return self.mention_pattern.findall(text) def analyze_sentiment(self, text: str) -> str: """Analyze text sentiment using the loaded model""" try: result = self.sentiment_model(text[:512]) # Limit text length for model sentiment = result[0]['label'] if sentiment == 'POSITIVE': return 'positive' elif sentiment == 'NEGATIVE': return 'negative' return 'neutral' except Exception as e: logger.warning(f"Sentiment analysis failed: {e}") return 'neutral' def extract_comment_data(self, comment_block: str) -> Tuple[Optional[str], Optional[str], int, int]: """ Extract structured data from a comment block Returns: (username, comment_text, likes_count, week_number) """ match = self.comment_pattern.search(comment_block) if not match: return None, None, 0, 0 username, comment, likes, week = match.groups() return ( username.strip(), self.clean_text(comment), int(likes or 0), int(week or 0) ) def analyze_post(self, content_type: str, link_to_post: str, post_likes: int, post_date: str, description: str, comment_count: int, all_comments: str) -> Tuple[str, str, str, str, str]: """ Analyze Instagram post comments and generate comprehensive analytics Args: content_type: Type of content ("Photo" or "Video") link_to_post: URL of the post post_likes: Number of likes on the post post_date: Date of post publication description: Post description/caption comment_count: Total number of comments all_comments: Raw text containing all comments Returns: Tuple containing: - Analytics summary - List of usernames - List of comments - Chronological list of likes - Total likes count """ try: # Split comments into blocks comments_blocks = [block for block in re.split(r'(?=Фото профиля)', all_comments) if block.strip()] # Initialize data containers data = { 'usernames': [], 'comments': [], 'likes': [], 'weeks': [], 'emojis': 0, 'mentions': [], 'sentiments': [], 'lengths': [] } # Process each comment block for block in comments_blocks: username, comment, like_count, week = self.extract_comment_data(block) if username and comment: data['usernames'].append(username) data['comments'].append(comment) data['likes'].append(like_count) data['weeks'].append(week) # Collect metrics data['emojis'] += self.count_emojis(comment) data['mentions'].extend(self.extract_mentions(comment)) data['sentiments'].append(self.analyze_sentiment(comment)) data['lengths'].append(len(comment)) # Calculate analytics total_comments = len(data['comments']) if total_comments == 0: raise ValueError("No valid comments found in input") analytics = { 'avg_length': sum(data['lengths']) / total_comments, 'sentiment_dist': Counter(data['sentiments']), 'active_users': Counter(data['usernames']).most_common(5), 'top_mentions': Counter(data['mentions']).most_common(5), 'avg_likes': sum(data['likes']) / total_comments, 'weeks_range': (min(data['weeks']), max(data['weeks'])), 'total_likes': sum(data['likes']) } # Generate summary summary = self._format_analytics_summary( content_type, link_to_post, data, analytics, total_comments ) return ( summary, '\n'.join(data['usernames']), '\n'.join(data['comments']), '\n'.join(map(str, data['likes'])), str(analytics['total_likes']) ) except Exception as e: logger.error(f"Error analyzing post: {e}", exc_info=True) return (f"Error during analysis: {str(e)}", "", "", "", "0") def _format_analytics_summary(self, content_type, link, data, analytics, total_comments): """Format analytics data into a readable summary""" return f""" Content Type: {content_type} Link to Post: {link} ОСНОВНАЯ СТАТИСТИКА: - Всего комментариев: {total_comments} - Всего лайков на комментариях: {analytics['total_likes']} - Среднее количество лайков: {analytics['avg_likes']:.1f} - Период активности: {analytics['weeks_range'][0]}-{analytics['weeks_range'][1]} недель АНАЛИЗ КОНТЕНТА: - Средняя длина комментария: {analytics['avg_length']:.1f} символов - Всего эмодзи использовано: {data['emojis']} - Тональность комментариев: * Позитивных: {analytics['sentiment_dist']['positive']} * Нейтральных: {analytics['sentiment_dist']['neutral']} * Негативных: {analytics['sentiment_dist']['negative']} АКТИВНОСТЬ ПОЛЬЗОВАТЕЛЕЙ: Самые активные комментаторы: {chr(10).join(f"- {user}: {count} комментариев" for user, count in analytics['active_users'])} Самые упоминаемые пользователи: {chr(10).join(f"- {user}: {count} упоминаний" for user, count in analytics['top_mentions'] if user)} ВОВЛЕЧЕННОСТЬ: - Процент комментариев с лайками: {(sum(1 for l in data['likes'] if l > 0) / total_comments * 100):.1f}% - Процент комментариев с эмодзи: {(sum(1 for c in data['comments'] if self.count_emojis(c) > 0) / total_comments * 100):.1f}% """ def create_interface(): """Create and configure the Gradio interface""" analyzer = CommentAnalyzer() iface = gr.Interface( fn=analyzer.analyze_post, inputs=[ gr.Radio( choices=["Photo", "Video"], label="Content Type", value="Photo" ), gr.Textbox( label="Link to Post", placeholder="Введите ссылку на пост" ), gr.Number( label="Likes", value=0 ), gr.Textbox( label="Post Date", placeholder="Введите дату публикации" ), gr.Textbox( label="Description", placeholder="Введите описание поста", lines=3 ), gr.Number( label="Total Comment Count", value=0 ), gr.Textbox( label="All Comments", placeholder="Вставьте комментарии", lines=10 ) ], outputs=[ gr.Textbox(label="Analytics Summary", lines=20), gr.Textbox(label="Usernames (Output 1)", lines=5), gr.Textbox(label="Comments (Output 2)", lines=5), gr.Textbox(label="Likes Chronology (Output 3)", lines=5), gr.Textbox(label="Total Likes on Comments (Output 4)") ], title="Instagram Comment Analyzer Pro", description="Расширенный анализатор комментариев Instagram с детальной аналитикой" ) return iface if __name__ == "__main__": iface = create_interface() iface.launch()