boompack commited on
Commit
e227756
·
verified ·
1 Parent(s): b4d3853

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -97
app.py CHANGED
@@ -163,109 +163,109 @@ class InstagramCommentAnalyzer:
163
  logger.error(f"Sentiment analysis failed: {str(e)}")
164
  return "UNKNOWN"
165
  def process_comment(self, text: str, parent_id: Optional[str] = None, level: int = 0) -> Optional[Comment]:
166
- """Обработка отдельного комментария"""
167
- if not self.validate_input(text):
168
- return None
169
-
170
- if level > self.max_depth:
171
- logger.warning(f"Maximum depth {self.max_depth} exceeded")
172
- self.stats['max_depth_reached'] += 1
173
- return None
174
-
175
- try:
176
- text = self.normalize_text(text)
177
- match = self.pattern.match(text)
178
-
179
- if not match:
180
- alt_match = self.alternative_parse(text)
181
- if not alt_match:
182
- raise ValueError(f"Could not parse comment: {text[:100]}...")
183
- match = alt_match
184
-
185
- data = match.groupdict()
186
- comment = Comment(
187
- username=data['username'].strip(),
188
- time=self.normalize_time(data['time']),
189
- content=self.clean_content(data['content']),
190
- likes=self.parse_likes(data.get('likes', '0')),
191
- level=level,
192
- parent_id=parent_id
193
- )
194
 
195
- # Анализ тональности и метаданных
196
- comment.sentiment = self.analyze_sentiment(comment.content)
197
- self.extract_metadata(comment)
198
-
199
- self.stats['total_comments'] += 1
200
- return comment
201
 
202
- except Exception as e:
203
- logger.error(f"Error processing comment: {str(e)}", exc_info=True)
204
- self.stats['failed_parses'] += 1
205
- return self.create_damaged_comment()
206
-
207
- def alternative_parse(self, text: str) -> Optional[re.Match]:
208
- """Альтернативный метод парсинга для сложных случаев"""
209
- alternative_patterns = [
210
- # Более простой паттерн
211
- r'(?P<username>[\w\u0400-\u04FF.-]+)\s*(?P<content>.*?)(?P<time>\d+\s+\w+\.?)(?P<likes>\d+)?',
212
- # Паттерн для мобильной версии
213
- r'(?P<username>[\w\u0400-\u04FF.-]+)\s*(?P<content>.*?)(?P<time>\d+\s+\w+)(?:Like)?(?P<likes>\d+)?'
214
- ]
215
 
216
- for pattern in alternative_patterns:
217
- try:
218
- match = re.compile(pattern, re.VERBOSE | re.DOTALL).match(text)
219
- if match:
220
- return match
221
- except Exception:
222
- continue
223
- return None
224
-
225
- def parse_likes(self, likes_str: str) -> int:
226
- """Безопасный парсинг количества лайков"""
227
- try:
228
- return int(re.sub(r'\D', '', likes_str) or 0)
229
- except (ValueError, TypeError):
230
- return 0
231
-
232
- def create_damaged_comment(self) -> Comment:
233
- """Создание заглушки для поврежденного комментария"""
234
- return Comment(
235
- username="[damaged]",
236
- time="unknown",
237
- content="[Поврежденные данные]",
238
- is_deleted=True
239
  )
240
 
241
- def validate_input(self, text: str) -> bool:
242
- """Валидация входного текста"""
243
- if not text or not isinstance(text, str):
244
- logger.error("Invalid input: text must be non-empty string")
245
- return False
246
- if len(text) > 50000:
247
- logger.error("Input text too large")
248
- return False
249
- return True
250
-
251
- def format_comment(self, comment: Comment, index: int) -> str:
252
- """Форматирование комментария для вывода"""
 
 
 
 
 
 
 
 
 
 
253
  try:
254
- if comment.is_deleted:
255
- return f'{index}. "[УДАЛЕНО]"'
256
-
257
- emoji_str = ' '.join(comment.emojis) if comment.emojis else ''
258
- mentions_str = ', '.join(comment.mentions) if comment.mentions else ''
259
- hashtags_str = ', '.join(comment.hashtags) if comment.hashtags else ''
260
-
261
- return (
262
- f'{index}. "{comment.username}" "{comment.time}" '
263
- f'"{comment.content}" "Лайки: {comment.likes}" '
264
- f'"Настроение: {comment.sentiment}" '
265
- f'"Эмодзи: {emoji_str}" '
266
- f'"Упоминания: {mentions_str}" '
267
- f'"Хэштеги: {hashtags_str}"'
268
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  except Exception as e:
270
  logger.error(f"Error formatting comment: {str(e)}")
271
  return f'{index}. "[ОШИБКА ФОРМАТИРОВАНИЯ]"'
 
163
  logger.error(f"Sentiment analysis failed: {str(e)}")
164
  return "UNKNOWN"
165
  def process_comment(self, text: str, parent_id: Optional[str] = None, level: int = 0) -> Optional[Comment]:
166
+ """Обработка отдельного комментария"""
167
+ if not self.validate_input(text):
168
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
+ if level > self.max_depth:
171
+ logger.warning(f"Maximum depth {self.max_depth} exceeded")
172
+ self.stats['max_depth_reached'] += 1
173
+ return None
 
 
174
 
175
+ try:
176
+ text = self.normalize_text(text)
177
+ match = self.pattern.match(text)
 
 
 
 
 
 
 
 
 
 
178
 
179
+ if not match:
180
+ alt_match = self.alternative_parse(text)
181
+ if not alt_match:
182
+ raise ValueError(f"Could not parse comment: {text[:100]}...")
183
+ match = alt_match
184
+
185
+ data = match.groupdict()
186
+ comment = Comment(
187
+ username=data['username'].strip(),
188
+ time=self.normalize_time(data['time']),
189
+ content=self.clean_content(data['content']),
190
+ likes=self.parse_likes(data.get('likes', '0')),
191
+ level=level,
192
+ parent_id=parent_id
 
 
 
 
 
 
 
 
 
193
  )
194
 
195
+ # Анализ тональности и метаданных
196
+ comment.sentiment = self.analyze_sentiment(comment.content)
197
+ self.extract_metadata(comment)
198
+
199
+ self.stats['total_comments'] += 1
200
+ return comment
201
+
202
+ except Exception as e:
203
+ logger.error(f"Error processing comment: {str(e)}", exc_info=True)
204
+ self.stats['failed_parses'] += 1
205
+ return self.create_damaged_comment()
206
+
207
+ def alternative_parse(self, text: str) -> Optional[re.Match]:
208
+ """Альтернативный метод парсинга для сложных случаев"""
209
+ alternative_patterns = [
210
+ # Более простой паттерн
211
+ r'(?P<username>[\w\u0400-\u04FF.-]+)\s*(?P<content>.*?)(?P<time>\d+\s+\w+\.?)(?P<likes>\d+)?',
212
+ # Паттерн для мобильной версии
213
+ r'(?P<username>[\w\u0400-\u04FF.-]+)\s*(?P<content>.*?)(?P<time>\d+\s+\w+)(?:Like)?(?P<likes>\d+)?'
214
+ ]
215
+
216
+ for pattern in alternative_patterns:
217
  try:
218
+ match = re.compile(pattern, re.VERBOSE | re.DOTALL).match(text)
219
+ if match:
220
+ return match
221
+ except Exception:
222
+ continue
223
+ return None
224
+
225
+ def parse_likes(self, likes_str: str) -> int:
226
+ """Безопасный парсинг количества лайков"""
227
+ try:
228
+ return int(re.sub(r'\D', '', likes_str) or 0)
229
+ except (ValueError, TypeError):
230
+ return 0
231
+
232
+ def create_damaged_comment(self) -> Comment:
233
+ """Создание заглушки для поврежденного комментария"""
234
+ return Comment(
235
+ username="[damaged]",
236
+ time="unknown",
237
+ content="[Поврежденные данные]",
238
+ is_deleted=True
239
+ )
240
+
241
+ def validate_input(self, text: str) -> bool:
242
+ """Валидация входного текста"""
243
+ if not text or not isinstance(text, str):
244
+ logger.error("Invalid input: text must be non-empty string")
245
+ return False
246
+ if len(text) > 50000:
247
+ logger.error("Input text too large")
248
+ return False
249
+ return True
250
+
251
+ def format_comment(self, comment: Comment, index: int) -> str:
252
+ """Форматирование комментария для вывода"""
253
+ try:
254
+ if comment.is_deleted:
255
+ return f'{index}. "[УДАЛЕНО]"'
256
+
257
+ emoji_str = ' '.join(comment.emojis) if comment.emojis else ''
258
+ mentions_str = ', '.join(comment.mentions) if comment.mentions else ''
259
+ hashtags_str = ', '.join(comment.hashtags) if comment.hashtags else ''
260
+
261
+ return (
262
+ f'{index}. "{comment.username}" "{comment.time}" '
263
+ f'"{comment.content}" "Лайки: {comment.likes}" '
264
+ f'"Настроение: {comment.sentiment}" '
265
+ f'"Эмодзи: {emoji_str}" '
266
+ f'"Упоминания: {mentions_str}" '
267
+ f'"Хэштеги: {hashtags_str}"'
268
+ )
269
  except Exception as e:
270
  logger.error(f"Error formatting comment: {str(e)}")
271
  return f'{index}. "[ОШИБКА ФОРМАТИРОВАНИЯ]"'