Spaces:

boompack
/

hy

Sleeping

App Files Files Community

boompack commited on Nov 7, 2024

Commit

e227756

verified ·

1 Parent(s): b4d3853

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -97

app.py CHANGED Viewed

@@ -163,109 +163,109 @@ class InstagramCommentAnalyzer:
             logger.error(f"Sentiment analysis failed: {str(e)}")
             return "UNKNOWN"
 def process_comment(self, text: str, parent_id: Optional[str] = None, level: int = 0) -> Optional[Comment]:
-        """Обработка отдельного комментария"""
-        if not self.validate_input(text):
-            return None
-        if level > self.max_depth:
-            logger.warning(f"Maximum depth {self.max_depth} exceeded")
-            self.stats['max_depth_reached'] += 1
-            return None
-        try:
-            text = self.normalize_text(text)
-            match = self.pattern.match(text)
-            if not match:
-                alt_match = self.alternative_parse(text)
-                if not alt_match:
-                    raise ValueError(f"Could not parse comment: {text[:100]}...")
-                match = alt_match
-            data = match.groupdict()
-            comment = Comment(
-                username=data['username'].strip(),
-                time=self.normalize_time(data['time']),
-                content=self.clean_content(data['content']),
-                likes=self.parse_likes(data.get('likes', '0')),
-                level=level,
-                parent_id=parent_id
-            )
-            # Анализ тональности и метаданных
-            comment.sentiment = self.analyze_sentiment(comment.content)
-            self.extract_metadata(comment)
-            self.stats['total_comments'] += 1
-            return comment
-        except Exception as e:
-            logger.error(f"Error processing comment: {str(e)}", exc_info=True)
-            self.stats['failed_parses'] += 1
-            return self.create_damaged_comment()
-   def alternative_parse(self, text: str) -> Optional[re.Match]:
-        """Альтернативный метод парсинга для сложных случаев"""
-        alternative_patterns = [
-            # Более простой паттерн
-            r'(?P<username>[\w\u0400-\u04FF.-]+)\s*(?P<content>.*?)(?P<time>\d+\s+\w+\.?)(?P<likes>\d+)?',
-            # Паттерн для мобильной версии
-            r'(?P<username>[\w\u0400-\u04FF.-]+)\s*(?P<content>.*?)(?P<time>\d+\s+\w+)(?:Like)?(?P<likes>\d+)?'
-        ]
-        for pattern in alternative_patterns:
-            try:
-                match = re.compile(pattern, re.VERBOSE | re.DOTALL).match(text)
-                if match:
-                    return match
-            except Exception:
-                continue
-        return None
-    def parse_likes(self, likes_str: str) -> int:
-        """Безопасный парсинг количества лайков"""
-        try:
-            return int(re.sub(r'\D', '', likes_str) or 0)
-        except (ValueError, TypeError):
-            return 0
-    def create_damaged_comment(self) -> Comment:
-        """Создание заглушки для поврежденного комментария"""
-        return Comment(
-            username="[damaged]",
-            time="unknown",
-            content="[Поврежденные данные]",
-            is_deleted=True
         )
-    def validate_input(self, text: str) -> bool:
-        """Валидация входного текста"""
-        if not text or not isinstance(text, str):
-            logger.error("Invalid input: text must be non-empty string")
-            return False
-        if len(text) > 50000:
-            logger.error("Input text too large")
-            return False
-        return True
-    def format_comment(self, comment: Comment, index: int) -> str:
-        """Форматирование комментария для вывода"""
         try:
-            if comment.is_deleted:
-                return f'{index}. "[УДАЛЕНО]"'
-            emoji_str = ' '.join(comment.emojis) if comment.emojis else ''
-            mentions_str = ', '.join(comment.mentions) if comment.mentions else ''
-            hashtags_str = ', '.join(comment.hashtags) if comment.hashtags else ''
-            return (
-                f'{index}. "{comment.username}" "{comment.time}" '
-                f'"{comment.content}" "Лайки: {comment.likes}" '
-                f'"Настроение: {comment.sentiment}" '
-                f'"Эмодзи: {emoji_str}" '
-                f'"Упоминания: {mentions_str}" '
-                f'"Хэштеги: {hashtags_str}"'
-            )
         except Exception as e:
             logger.error(f"Error formatting comment: {str(e)}")
             return f'{index}. "[ОШИБКА ФОРМАТИРОВАНИЯ]"'

             logger.error(f"Sentiment analysis failed: {str(e)}")
             return "UNKNOWN"
 def process_comment(self, text: str, parent_id: Optional[str] = None, level: int = 0) -> Optional[Comment]:
+    """Обработка отдельного комментария"""
+    if not self.validate_input(text):
+        return None
+    if level > self.max_depth:
+        logger.warning(f"Maximum depth {self.max_depth} exceeded")
+        self.stats['max_depth_reached'] += 1
+        return None
+    try:
+        text = self.normalize_text(text)
+        match = self.pattern.match(text)
+        if not match:
+            alt_match = self.alternative_parse(text)
+            if not alt_match:
+                raise ValueError(f"Could not parse comment: {text[:100]}...")
+            match = alt_match
+        data = match.groupdict()
+        comment = Comment(
+            username=data['username'].strip(),
+            time=self.normalize_time(data['time']),
+            content=self.clean_content(data['content']),
+            likes=self.parse_likes(data.get('likes', '0')),
+            level=level,
+            parent_id=parent_id
         )
+        # Анализ тональности и метаданных
+        comment.sentiment = self.analyze_sentiment(comment.content)
+        self.extract_metadata(comment)
+        self.stats['total_comments'] += 1
+        return comment
+    except Exception as e:
+        logger.error(f"Error processing comment: {str(e)}", exc_info=True)
+        self.stats['failed_parses'] += 1
+        return self.create_damaged_comment()
+def alternative_parse(self, text: str) -> Optional[re.Match]:
+    """Альтернативный метод парсинга для сложных случаев"""
+    alternative_patterns = [
+        # Более простой паттерн
+        r'(?P<username>[\w\u0400-\u04FF.-]+)\s*(?P<content>.*?)(?P<time>\d+\s+\w+\.?)(?P<likes>\d+)?',
+        # Паттерн для мобильной версии
+        r'(?P<username>[\w\u0400-\u04FF.-]+)\s*(?P<content>.*?)(?P<time>\d+\s+\w+)(?:Like)?(?P<likes>\d+)?'
+    ]
+    for pattern in alternative_patterns:
         try:
+            match = re.compile(pattern, re.VERBOSE | re.DOTALL).match(text)
+            if match:
+                return match
+        except Exception:
+            continue
+    return None
+def parse_likes(self, likes_str: str) -> int:
+    """Безопасный парсинг количества лайков"""
+    try:
+        return int(re.sub(r'\D', '', likes_str) or 0)
+    except (ValueError, TypeError):
+        return 0
+def create_damaged_comment(self) -> Comment:
+    """Создание заглушки для поврежденного комментария"""
+    return Comment(
+        username="[damaged]",
+        time="unknown",
+        content="[Поврежденные данные]",
+        is_deleted=True
+    )
+def validate_input(self, text: str) -> bool:
+    """Валидация входного текста"""
+    if not text or not isinstance(text, str):
+        logger.error("Invalid input: text must be non-empty string")
+        return False
+    if len(text) > 50000:
+        logger.error("Input text too large")
+        return False
+    return True
+def format_comment(self, comment: Comment, index: int) -> str:
+    """Форматирование комментария для вывода"""
+    try:
+        if comment.is_deleted:
+            return f'{index}. "[УДАЛЕНО]"'
+        emoji_str = ' '.join(comment.emojis) if comment.emojis else ''
+        mentions_str = ', '.join(comment.mentions) if comment.mentions else ''
+        hashtags_str = ', '.join(comment.hashtags) if comment.hashtags else ''
+        return (
+            f'{index}. "{comment.username}" "{comment.time}" '
+            f'"{comment.content}" "Лайки: {comment.likes}" '
+            f'"Настроение: {comment.sentiment}" '
+            f'"Эмодзи: {emoji_str}" '
+            f'"Упоминания: {mentions_str}" '
+            f'"Хэштеги: {hashtags_str}"'
+        )
         except Exception as e:
             logger.error(f"Error formatting comment: {str(e)}")
             return f'{index}. "[ОШИБКА ФОРМАТИРОВАНИЯ]"'