reab5555 commited on
Commit
fd4c3a4
·
verified ·
1 Parent(s): 191a4f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -41
app.py CHANGED
@@ -173,7 +173,7 @@ def cluster_faces(face_images):
173
  X = X / 255.0
174
 
175
  # Perform DBSCAN clustering
176
- dbscan = DBSCAN(eps=0.3, min_samples=3, metric='euclidean')
177
  clusters = dbscan.fit_predict(X)
178
 
179
  # If DBSCAN assigns all to noise (-1), consider it as one cluster
@@ -238,6 +238,13 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
238
 
239
  return df, largest_cluster
240
 
 
 
 
 
 
 
 
241
  class LSTMAutoencoder(nn.Module):
242
  def __init__(self, input_size, hidden_size=64, num_layers=2):
243
  super(LSTMAutoencoder, self).__init__()
@@ -252,7 +259,7 @@ class LSTMAutoencoder(nn.Module):
252
  out = self.fc(outputs)
253
  return out
254
 
255
- def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, batch_size=64):
256
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
257
  X = torch.FloatTensor(X).to(device)
258
  if X.dim() == 2:
@@ -289,9 +296,7 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
289
  reconstructed = model(X).squeeze(0).cpu().numpy()
290
 
291
  mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
292
- top_indices_all = mse_all.argsort()[-num_anomalies:][::-1]
293
- anomalies_all = np.zeros(len(mse_all), dtype=bool)
294
- anomalies_all[top_indices_all] = True
295
 
296
  component_columns = [col for col in feature_columns if col.startswith('Comp')]
297
  component_indices = [feature_columns.index(col) for col in component_columns]
@@ -302,15 +307,13 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
302
  else:
303
  mse_comp = mse_all
304
 
305
- top_indices_comp = mse_comp.argsort()[-num_anomalies:][::-1]
306
- anomalies_comp = np.zeros(len(mse_comp), dtype=bool)
307
- anomalies_comp[top_indices_comp] = True
308
 
309
  return (anomalies_all, mse_all, top_indices_all,
310
  anomalies_comp, mse_comp, top_indices_comp,
311
  model)
312
 
313
- def emotion_anomaly_detection(emotion_data, num_anomalies=10, epochs=100, batch_size=64):
314
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
315
  X = torch.FloatTensor(emotion_data.values).to(device)
316
  if X.dim() == 1:
@@ -335,9 +338,7 @@ def emotion_anomaly_detection(emotion_data, num_anomalies=10, epochs=100, batch_
335
  reconstructed = model(X).squeeze(0).cpu().numpy()
336
 
337
  mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
338
- top_indices = mse.argsort()[-num_anomalies:][::-1]
339
- anomalies = np.zeros(len(mse), dtype=bool)
340
- anomalies[top_indices] = True
341
 
342
  return anomalies, mse, top_indices
343
 
@@ -350,7 +351,7 @@ def normalize_scores(scores):
350
 
351
 
352
  def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
353
- plt.figure(figsize=(16, 8), dpi=400)
354
  fig, ax = plt.subplots(figsize=(16, 8))
355
 
356
  df['Seconds'] = df['Timecode'].apply(
@@ -379,7 +380,7 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
379
 
380
  max_seconds = df['Seconds'].max()
381
  ax.set_xlim(0, max_seconds)
382
- num_ticks = 80
383
  ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
384
  ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
385
  rotation=90, ha='center', va='top')
@@ -392,8 +393,8 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
392
  plt.tight_layout()
393
  return fig
394
 
395
- def plot_emotion(df, emotion, anomaly_scores, top_indices, num_anomalies, color, timecodes):
396
- plt.figure(figsize=(16, 8), dpi=400)
397
  fig, ax = plt.subplots(figsize=(16, 8))
398
 
399
  df['Seconds'] = df['Timecode'].apply(
@@ -419,38 +420,45 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, num_anomalies, color,
419
 
420
  max_seconds = df['Seconds'].max()
421
  ax.set_xlim(0, max_seconds)
422
- num_ticks = 80
423
  ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
424
  ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
425
  rotation=90, ha='center', va='top')
426
 
427
  ax.set_xlabel('Time')
428
  ax.set_ylabel(f'{emotion.capitalize()} Anomaly Score')
429
- ax.set_title(f'{emotion.capitalize()} Anomaly Scores (Top {num_anomalies} in Red)')
430
 
431
  ax.grid(True, linestyle='--', alpha=0.7)
432
  plt.tight_layout()
433
  return fig
434
 
435
- def get_random_face_samples(organized_faces_folder, output_folder):
436
  face_samples = []
437
  for cluster_folder in os.listdir(organized_faces_folder):
438
  if cluster_folder.startswith("person_"):
439
  person_folder = os.path.join(organized_faces_folder, cluster_folder)
440
  face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
441
  if face_files:
442
- random_face = np.random.choice(face_files)
443
- face_path = os.path.join(person_folder, random_face)
444
- output_path = os.path.join(output_folder, f"face_sample_{cluster_folder}.jpg")
445
- face_img = cv2.imread(face_path)
446
- if face_img is not None:
447
- small_face = cv2.resize(face_img, (224, 224))
448
- cv2.imwrite(output_path, small_face)
449
- face_samples.append(output_path)
 
 
 
 
 
 
 
450
  return face_samples
451
 
452
 
453
- def process_video(video_path, num_anomalies, num_components, desired_fps, batch_size, progress=gr.Progress()):
454
  output_folder = "output"
455
  os.makedirs(output_folder, exist_ok=True)
456
 
@@ -490,13 +498,13 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
490
  progress(0.7, "Organizing faces")
491
  organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
492
 
493
- progress(0.75, "Getting face samples")
494
- face_samples = get_random_face_samples(organized_faces_folder, output_folder)
495
-
496
  progress(0.8, "Saving person data")
497
  df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
498
  original_fps, temp_dir, num_components, video_duration)
499
 
 
 
 
500
  progress(0.9, "Performing anomaly detection")
501
  feature_columns = [col for col in df.columns if
502
  col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
@@ -504,7 +512,7 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
504
 
505
  try:
506
  anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
507
- X, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
508
 
509
  # Normalize anomaly scores
510
  anomaly_scores_all = normalize_scores(anomaly_scores_all)
@@ -513,7 +521,7 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
513
  # Perform anomaly detection for each emotion using LSTM autoencoder
514
  emotion_anomalies = {}
515
  for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
516
- anomalies, scores, indices = emotion_anomaly_detection(df[emotion], num_anomalies=num_anomalies)
517
  emotion_anomalies[emotion] = {
518
  'anomalies': anomalies,
519
  'scores': normalize_scores(scores),
@@ -534,7 +542,6 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
534
  plot_emotion(df, emotion,
535
  emotion_anomalies[emotion]['scores'],
536
  emotion_anomalies[emotion]['indices'],
537
- num_anomalies,
538
  color,
539
  df['Timecode'].iloc[emotion_anomalies[emotion]['indices']].values)
540
  for emotion, color in zip(['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral'],
@@ -548,17 +555,17 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
548
  results += f"Breakdown of persons/clusters:\n"
549
  for cluster_id in range(num_clusters):
550
  results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
551
- results += f"\nTop {num_anomalies} anomalies (Facial Features + Emotions):\n"
552
  results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
553
  zip(anomaly_scores_all[top_indices_all[1:]],
554
  df['Timecode'].iloc[top_indices_all[1:]].values)])
555
- results += f"\n\nTop {num_anomalies} anomalies (Facial Features):\n"
556
  results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
557
  zip(anomaly_scores_comp[top_indices_comp[1:]],
558
  df['Timecode'].iloc[top_indices_comp[1:]].values)])
559
 
560
  for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
561
- results += f"\n\nTop {num_anomalies} {emotion.capitalize()} Anomalies:\n"
562
  results += "\n".join([f"{emotion_anomalies[emotion]['scores'][i]:.2f} at {df['Timecode'].iloc[i]}"
563
  for i in emotion_anomalies[emotion]['indices'] if i > 0])
564
 
@@ -575,7 +582,6 @@ iface = gr.Interface(
575
  fn=process_video,
576
  inputs=[
577
  gr.Video(),
578
- gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Anomalies"),
579
  gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
580
  gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Desired FPS"),
581
  gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
@@ -590,15 +596,14 @@ iface = gr.Interface(
590
  gr.Plot(label="Happy Anomalies"),
591
  gr.Plot(label="Surprise Anomalies"),
592
  gr.Plot(label="Neutral Anomalies"),
593
- gr.Gallery(label="Detected Persons", columns=[2], rows=[1], height="auto")
594
  ],
595
  title="Facial Expressions Anomaly Detection",
596
  description="""
597
  This application detects anomalies in facial expressions and emotions from a video input.
598
- It identifies distinct persons in the video and provides a sample face for each.
599
 
600
  Adjust the parameters as needed:
601
- - Number of Anomalies: How many top anomalies or high intensities to highlight
602
  - Number of Components: Complexity of the facial expression model
603
  - Desired FPS: Frames per second to analyze (lower for faster processing)
604
  - Batch Size: Affects processing speed and memory usage
 
173
  X = X / 255.0
174
 
175
  # Perform DBSCAN clustering
176
+ dbscan = DBSCAN(eps=0.3, min_samples=10, metric='euclidean')
177
  clusters = dbscan.fit_predict(X)
178
 
179
  # If DBSCAN assigns all to noise (-1), consider it as one cluster
 
238
 
239
  return df, largest_cluster
240
 
241
+ def determine_optimal_anomalies(anomaly_scores, z_threshold=3):
242
+ mean = np.mean(anomaly_scores)
243
+ std = np.std(anomaly_scores)
244
+ threshold = mean + z_threshold * std
245
+ anomalies = anomaly_scores > threshold
246
+ return anomalies, np.where(anomalies)[0]
247
+
248
  class LSTMAutoencoder(nn.Module):
249
  def __init__(self, input_size, hidden_size=64, num_layers=2):
250
  super(LSTMAutoencoder, self).__init__()
 
259
  out = self.fc(outputs)
260
  return out
261
 
262
+ def lstm_anomaly_detection(X, feature_columns, epochs=100, batch_size=64):
263
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
264
  X = torch.FloatTensor(X).to(device)
265
  if X.dim() == 2:
 
296
  reconstructed = model(X).squeeze(0).cpu().numpy()
297
 
298
  mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
299
+ anomalies_all, top_indices_all = determine_optimal_anomalies(mse_all)
 
 
300
 
301
  component_columns = [col for col in feature_columns if col.startswith('Comp')]
302
  component_indices = [feature_columns.index(col) for col in component_columns]
 
307
  else:
308
  mse_comp = mse_all
309
 
310
+ anomalies_comp, top_indices_comp = determine_optimal_anomalies(mse_comp)
 
 
311
 
312
  return (anomalies_all, mse_all, top_indices_all,
313
  anomalies_comp, mse_comp, top_indices_comp,
314
  model)
315
 
316
+ def emotion_anomaly_detection(emotion_data, epochs=100, batch_size=64):
317
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
318
  X = torch.FloatTensor(emotion_data.values).to(device)
319
  if X.dim() == 1:
 
338
  reconstructed = model(X).squeeze(0).cpu().numpy()
339
 
340
  mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
341
+ anomalies, top_indices = determine_optimal_anomalies(mse)
 
 
342
 
343
  return anomalies, mse, top_indices
344
 
 
351
 
352
 
353
  def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
354
+ plt.figure(figsize=(16, 8), dpi=500)
355
  fig, ax = plt.subplots(figsize=(16, 8))
356
 
357
  df['Seconds'] = df['Timecode'].apply(
 
380
 
381
  max_seconds = df['Seconds'].max()
382
  ax.set_xlim(0, max_seconds)
383
+ num_ticks = 100
384
  ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
385
  ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
386
  rotation=90, ha='center', va='top')
 
393
  plt.tight_layout()
394
  return fig
395
 
396
+ def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
397
+ plt.figure(figsize=(16, 8), dpi=500)
398
  fig, ax = plt.subplots(figsize=(16, 8))
399
 
400
  df['Seconds'] = df['Timecode'].apply(
 
420
 
421
  max_seconds = df['Seconds'].max()
422
  ax.set_xlim(0, max_seconds)
423
+ num_ticks = 100
424
  ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
425
  ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
426
  rotation=90, ha='center', va='top')
427
 
428
  ax.set_xlabel('Time')
429
  ax.set_ylabel(f'{emotion.capitalize()} Anomaly Score')
430
+ ax.set_title(f'{emotion.capitalize()} Anomaly Scores')
431
 
432
  ax.grid(True, linestyle='--', alpha=0.7)
433
  plt.tight_layout()
434
  return fig
435
 
436
+ def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
437
  face_samples = []
438
  for cluster_folder in os.listdir(organized_faces_folder):
439
  if cluster_folder.startswith("person_"):
440
  person_folder = os.path.join(organized_faces_folder, cluster_folder)
441
  face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
442
  if face_files:
443
+ if int(cluster_folder.split('_')[1]) == largest_cluster:
444
+ # Get 10 samples for the largest cluster
445
+ samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
446
+ else:
447
+ # Get 1 sample for other clusters
448
+ samples = [np.random.choice(face_files)]
449
+
450
+ for i, sample in enumerate(samples):
451
+ face_path = os.path.join(person_folder, sample)
452
+ output_path = os.path.join(output_folder, f"face_sample_{cluster_folder}_{i}.jpg")
453
+ face_img = cv2.imread(face_path)
454
+ if face_img is not None:
455
+ small_face = cv2.resize(face_img, (160, 160))
456
+ cv2.imwrite(output_path, small_face)
457
+ face_samples.append(output_path)
458
  return face_samples
459
 
460
 
461
+ def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
462
  output_folder = "output"
463
  os.makedirs(output_folder, exist_ok=True)
464
 
 
498
  progress(0.7, "Organizing faces")
499
  organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
500
 
 
 
 
501
  progress(0.8, "Saving person data")
502
  df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
503
  original_fps, temp_dir, num_components, video_duration)
504
 
505
+ progress(0.85, "Getting face samples")
506
+ face_samples = get_random_face_samples(organized_faces_folder, output_folder, largest_cluster)
507
+
508
  progress(0.9, "Performing anomaly detection")
509
  feature_columns = [col for col in df.columns if
510
  col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
 
512
 
513
  try:
514
  anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
515
+ X, feature_columns, batch_size=batch_size)
516
 
517
  # Normalize anomaly scores
518
  anomaly_scores_all = normalize_scores(anomaly_scores_all)
 
521
  # Perform anomaly detection for each emotion using LSTM autoencoder
522
  emotion_anomalies = {}
523
  for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
524
+ anomalies, scores, indices = emotion_anomaly_detection(df[emotion])
525
  emotion_anomalies[emotion] = {
526
  'anomalies': anomalies,
527
  'scores': normalize_scores(scores),
 
542
  plot_emotion(df, emotion,
543
  emotion_anomalies[emotion]['scores'],
544
  emotion_anomalies[emotion]['indices'],
 
545
  color,
546
  df['Timecode'].iloc[emotion_anomalies[emotion]['indices']].values)
547
  for emotion, color in zip(['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral'],
 
555
  results += f"Breakdown of persons/clusters:\n"
556
  for cluster_id in range(num_clusters):
557
  results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
558
+ results += f"\nAnomalies (Facial Features + Emotions):\n"
559
  results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
560
  zip(anomaly_scores_all[top_indices_all[1:]],
561
  df['Timecode'].iloc[top_indices_all[1:]].values)])
562
+ results += f"\n\nAnomalies (Facial Features):\n"
563
  results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
564
  zip(anomaly_scores_comp[top_indices_comp[1:]],
565
  df['Timecode'].iloc[top_indices_comp[1:]].values)])
566
 
567
  for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
568
+ results += f"\n\n{emotion.capitalize()} Anomalies:\n"
569
  results += "\n".join([f"{emotion_anomalies[emotion]['scores'][i]:.2f} at {df['Timecode'].iloc[i]}"
570
  for i in emotion_anomalies[emotion]['indices'] if i > 0])
571
 
 
582
  fn=process_video,
583
  inputs=[
584
  gr.Video(),
 
585
  gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
586
  gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Desired FPS"),
587
  gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
 
596
  gr.Plot(label="Happy Anomalies"),
597
  gr.Plot(label="Surprise Anomalies"),
598
  gr.Plot(label="Neutral Anomalies"),
599
+ gr.Gallery(label="Detected Persons", columns=[5], rows=[2], height="auto")
600
  ],
601
  title="Facial Expressions Anomaly Detection",
602
  description="""
603
  This application detects anomalies in facial expressions and emotions from a video input.
604
+ It identifies distinct persons in the video and provides sample faces for each, with 10 samples for the most frequent person.
605
 
606
  Adjust the parameters as needed:
 
607
  - Number of Components: Complexity of the facial expression model
608
  - Desired FPS: Frames per second to analyze (lower for faster processing)
609
  - Batch Size: Affects processing speed and memory usage