reab5555 commited on
Commit
21dc0af
·
verified ·
1 Parent(s): 86bd3cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -59
app.py CHANGED
@@ -35,8 +35,7 @@ matplotlib.rcParams['savefig.dpi'] = 400
35
  # Initialize models and other global variables
36
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
37
 
38
- mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.98, 0.98, 0.98], min_face_size=50,
39
- selection_method='largest')
40
  model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
41
  mp_face_mesh = mp.solutions.face_mesh
42
  face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
@@ -156,7 +155,6 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
156
 
157
  return embeddings_by_frame, emotions_by_frame, aligned_face_paths
158
 
159
-
160
  def cluster_faces(embeddings):
161
  if len(embeddings) < 2:
162
  print("Not enough faces for clustering. Assigning all to one cluster.")
@@ -228,7 +226,7 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
228
 
229
  return df, largest_cluster
230
 
231
- def determine_optimal_anomalies(anomaly_scores, z_threshold=3):
232
  mean = np.mean(anomaly_scores)
233
  std = np.std(anomaly_scores)
234
  threshold = mean + z_threshold * std
@@ -239,7 +237,7 @@ def timecode_to_seconds(timecode):
239
  h, m, s = map(float, timecode.split(':'))
240
  return h * 3600 + m * 60 + s
241
 
242
- def group_similar_timecodes(timecodes, scores, threshold_seconds=5):
243
  grouped = []
244
  current_group = []
245
 
@@ -282,7 +280,7 @@ def lstm_anomaly_detection(X, feature_columns, epochs=100, batch_size=64):
282
 
283
  print(f"X shape after reshaping: {X.shape}")
284
 
285
- train_size = int(0.85 * X.shape[1])
286
  X_train, X_val = X[:, :train_size, :], X[:, train_size:, :]
287
 
288
  model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
@@ -366,6 +364,34 @@ def plot_to_image(fig):
366
  buf.seek(0)
367
  return buf
368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
370
  plt.figure(figsize=(16, 8), dpi=300)
371
  fig, ax = plt.subplots(figsize=(16, 8))
@@ -373,25 +399,25 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
373
  df['Seconds'] = df['Timecode'].apply(
374
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
375
 
376
- normalized_scores = normalize_scores(anomaly_scores)
377
-
378
- seconds = df['Seconds'].values[1:]
379
- scores = normalized_scores[1:]
380
 
381
  ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
382
 
383
- top_indices = [idx for idx in top_indices if idx > 0]
384
- ax.scatter(df['Seconds'].iloc[top_indices], normalized_scores[top_indices], color='red', s=50, zorder=5)
385
 
386
  # Calculate and plot baseline
387
- non_anomalous_scores = np.delete(normalized_scores, top_indices)
388
  baseline = np.mean(non_anomalous_scores)
389
  ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
390
  ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
391
  verticalalignment='bottom', horizontalalignment='right', color='black')
392
 
393
  grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
394
- normalized_scores[top_indices])
395
 
396
  for group in grouped_timecodes:
397
  max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
@@ -424,12 +450,14 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
424
  df['Seconds'] = df['Timecode'].apply(
425
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
426
 
427
- seconds = df['Seconds'].values[1:]
428
- scores = anomaly_scores[1:]
 
 
429
 
430
  ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
431
 
432
- top_indices = [idx for idx in top_indices if idx > 0]
433
  ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
434
 
435
  # Calculate and plot baseline
@@ -467,28 +495,33 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
467
  plt.close()
468
  return fig
469
 
470
- def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
471
- face_samples = []
472
- for cluster_folder in os.listdir(organized_faces_folder):
473
  if cluster_folder.startswith("person_"):
474
  person_folder = os.path.join(organized_faces_folder, cluster_folder)
475
- face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
476
  if face_files:
477
- if int(cluster_folder.split('_')[1]) == largest_cluster:
478
- samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
 
 
 
 
 
 
 
 
479
  else:
480
- samples = [np.random.choice(face_files)]
481
-
482
- for i, sample in enumerate(samples):
483
- face_path = os.path.join(person_folder, sample)
484
- output_path = os.path.join(output_folder, f"face_sample_{cluster_folder}_{i}.jpg")
485
- face_img = cv2.imread(face_path)
486
- if face_img is not None:
487
- small_face = cv2.resize(face_img, (160, 160))
488
- cv2.imwrite(output_path, small_face)
489
- face_samples.append(output_path)
490
  return face_samples
491
-
492
  def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
493
  output_folder = "output"
494
  os.makedirs(output_folder, exist_ok=True)
@@ -519,7 +552,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
519
 
520
  if not aligned_face_paths:
521
  return ("No faces were extracted from the video.",
522
- None, None, None, None, None, None, None, None, None)
523
 
524
  progress(0.6, "Clustering faces")
525
  embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
@@ -534,7 +567,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
534
  original_fps, temp_dir, num_components, video_duration)
535
 
536
  progress(0.85, "Getting face samples")
537
- face_samples = get_random_face_samples(organized_faces_folder, output_folder, largest_cluster)
538
 
539
  progress(0.9, "Performing anomaly detection")
540
  feature_columns = [col for col in df.columns if
@@ -559,7 +592,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
559
 
560
  except Exception as e:
561
  print(f"Error details: {str(e)}")
562
- return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None, None, None, None
563
 
564
  progress(0.95, "Generating plots")
565
  try:
@@ -578,41 +611,36 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
578
  ['purple', 'green', 'orange', 'darkblue', 'gold', 'grey'])
579
  ]
580
  except Exception as e:
581
- return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None, None
582
 
583
  progress(1.0, "Preparing results")
584
  results = f"Number of persons/clusters detected: {num_clusters}\n\n"
585
  results += f"Breakdown of persons/clusters:\n"
586
  for cluster_id in range(num_clusters):
587
  results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
588
- results += f"\nAnomalies (Facial Features + Emotions):\n"
589
- results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
590
- zip(anomaly_scores_all[top_indices_all[1:]],
591
- df['Timecode'].iloc[top_indices_all[1:]].values)])
592
- results += f"\n\nAnomalies (Facial Features):\n"
593
- results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
594
- zip(anomaly_scores_comp[top_indices_comp[1:]],
595
- df['Timecode'].iloc[top_indices_comp[1:]].values)])
596
-
597
- for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
598
- results += f"\n\n{emotion.capitalize()} Anomalies:\n"
599
- results += "\n".join([f"{emotion_anomalies[emotion]['scores'][i]:.2f} at {df['Timecode'].iloc[i]}"
600
- for i in emotion_anomalies[emotion]['indices'] if i > 0])
601
 
602
  return (
603
  results,
604
  anomaly_plot_all,
605
  anomaly_plot_comp,
606
  *emotion_plots,
607
- face_samples
 
608
  )
609
 
 
 
 
 
 
 
610
  iface = gr.Interface(
611
  fn=process_video,
612
  inputs=[
613
  gr.Video(),
614
- gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
615
- gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Desired FPS"),
616
  gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
617
  ],
618
  outputs=[
@@ -625,12 +653,11 @@ iface = gr.Interface(
625
  gr.Plot(label="Happy Anomalies"),
626
  gr.Plot(label="Surprise Anomalies"),
627
  gr.Plot(label="Neutral Anomalies"),
628
- gr.Gallery(label="Random Samples of Detected Persons", columns=[5], rows=[2], height="auto")
629
- ],
630
  title="Facial Expressions Anomaly Detection",
631
  description="""
632
  This application detects anomalies in facial expressions and emotions from a video input.
633
- It identifies distinct persons in the video and provides sample faces for each, with 10 samples for the most frequent person.
634
 
635
  Adjust the parameters as needed:
636
  - Number of Components: Complexity of the facial expression model
@@ -642,5 +669,5 @@ iface = gr.Interface(
642
  allow_flagging="never"
643
  )
644
 
645
- if __name__ == "__main__":
646
- iface.launch()
 
35
  # Initialize models and other global variables
36
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
37
 
38
+ mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.975, 0.975, 0.975], min_face_size=100)
 
39
  model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
40
  mp_face_mesh = mp.solutions.face_mesh
41
  face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
 
155
 
156
  return embeddings_by_frame, emotions_by_frame, aligned_face_paths
157
 
 
158
  def cluster_faces(embeddings):
159
  if len(embeddings) < 2:
160
  print("Not enough faces for clustering. Assigning all to one cluster.")
 
226
 
227
  return df, largest_cluster
228
 
229
+ def determine_optimal_anomalies(anomaly_scores, z_threshold=3.5):
230
  mean = np.mean(anomaly_scores)
231
  std = np.std(anomaly_scores)
232
  threshold = mean + z_threshold * std
 
237
  h, m, s = map(float, timecode.split(':'))
238
  return h * 3600 + m * 60 + s
239
 
240
+ def group_similar_timecodes(timecodes, scores, threshold_seconds=10):
241
  grouped = []
242
  current_group = []
243
 
 
280
 
281
  print(f"X shape after reshaping: {X.shape}")
282
 
283
+ train_size = int(0.9 * X.shape[1])
284
  X_train, X_val = X[:, :train_size, :], X[:, train_size:, :]
285
 
286
  model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
 
364
  buf.seek(0)
365
  return buf
366
 
367
+ def embedding_anomaly_detection(embeddings, epochs=100, batch_size=64):
368
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
369
+ X = torch.FloatTensor(embeddings).to(device)
370
+ if X.dim() == 2:
371
+ X = X.unsqueeze(0)
372
+ elif X.dim() == 1:
373
+ X = X.unsqueeze(0).unsqueeze(2)
374
+
375
+ model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
376
+ criterion = nn.MSELoss()
377
+ optimizer = optim.Adam(model.parameters())
378
+
379
+ for epoch in range(epochs):
380
+ model.train()
381
+ optimizer.zero_grad()
382
+ output = model(X)
383
+ loss = criterion(output, X)
384
+ loss.backward()
385
+ optimizer.step()
386
+
387
+ model.eval()
388
+ with torch.no_grad():
389
+ reconstructed = model(X).squeeze(0).cpu().numpy()
390
+
391
+ mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
392
+ anomalies, top_indices = determine_optimal_anomalies(mse)
393
+
394
+ return anomalies, mse, top_indices
395
  def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
396
  plt.figure(figsize=(16, 8), dpi=300)
397
  fig, ax = plt.subplots(figsize=(16, 8))
 
399
  df['Seconds'] = df['Timecode'].apply(
400
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
401
 
402
+ # Filter out data points without faces
403
+ valid_indices = [i for i in range(len(anomaly_scores)) if i in df.index]
404
+ seconds = df['Seconds'].iloc[valid_indices].values
405
+ scores = anomaly_scores[valid_indices]
406
 
407
  ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
408
 
409
+ top_indices = [idx for idx in top_indices if idx in valid_indices]
410
+ ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
411
 
412
  # Calculate and plot baseline
413
+ non_anomalous_scores = np.delete(scores, top_indices)
414
  baseline = np.mean(non_anomalous_scores)
415
  ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
416
  ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
417
  verticalalignment='bottom', horizontalalignment='right', color='black')
418
 
419
  grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
420
+ scores[top_indices])
421
 
422
  for group in grouped_timecodes:
423
  max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
 
450
  df['Seconds'] = df['Timecode'].apply(
451
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
452
 
453
+ # Filter out data points without faces
454
+ valid_indices = [i for i in range(len(anomaly_scores)) if i in df.index]
455
+ seconds = df['Seconds'].iloc[valid_indices].values
456
+ scores = anomaly_scores[valid_indices]
457
 
458
  ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
459
 
460
+ top_indices = [idx for idx in top_indices if idx in valid_indices]
461
  ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
462
 
463
  # Calculate and plot baseline
 
495
  plt.close()
496
  return fig
497
 
498
+ def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster):
499
+ face_samples = {"most_frequent": [], "others": []}
500
+ for cluster_folder in sorted(os.listdir(organized_faces_folder)):
501
  if cluster_folder.startswith("person_"):
502
  person_folder = os.path.join(organized_faces_folder, cluster_folder)
503
+ face_files = sorted([f for f in os.listdir(person_folder) if f.endswith('.jpg')])
504
  if face_files:
505
+ cluster_id = int(cluster_folder.split('_')[1])
506
+ if cluster_id == largest_cluster:
507
+ for i, sample in enumerate(face_files):
508
+ face_path = os.path.join(person_folder, sample)
509
+ output_path = os.path.join(output_folder, f"face_sample_most_frequent_{i:04d}.jpg")
510
+ face_img = cv2.imread(face_path)
511
+ if face_img is not None:
512
+ small_face = cv2.resize(face_img, (160, 160))
513
+ cv2.imwrite(output_path, small_face)
514
+ face_samples["most_frequent"].append(output_path)
515
  else:
516
+ for i, sample in enumerate(face_files):
517
+ face_path = os.path.join(person_folder, sample)
518
+ output_path = os.path.join(output_folder, f"face_sample_other_{cluster_id:02d}_{i:04d}.jpg")
519
+ face_img = cv2.imread(face_path)
520
+ if face_img is not None:
521
+ small_face = cv2.resize(face_img, (160, 160))
522
+ cv2.imwrite(output_path, small_face)
523
+ face_samples["others"].append(output_path)
 
 
524
  return face_samples
 
525
  def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
526
  output_folder = "output"
527
  os.makedirs(output_folder, exist_ok=True)
 
552
 
553
  if not aligned_face_paths:
554
  return ("No faces were extracted from the video.",
555
+ None, None, None, None, None, None, None, None)
556
 
557
  progress(0.6, "Clustering faces")
558
  embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
 
567
  original_fps, temp_dir, num_components, video_duration)
568
 
569
  progress(0.85, "Getting face samples")
570
+ face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
571
 
572
  progress(0.9, "Performing anomaly detection")
573
  feature_columns = [col for col in df.columns if
 
592
 
593
  except Exception as e:
594
  print(f"Error details: {str(e)}")
595
+ return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None, None, None
596
 
597
  progress(0.95, "Generating plots")
598
  try:
 
611
  ['purple', 'green', 'orange', 'darkblue', 'gold', 'grey'])
612
  ]
613
  except Exception as e:
614
+ return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None
615
 
616
  progress(1.0, "Preparing results")
617
  results = f"Number of persons/clusters detected: {num_clusters}\n\n"
618
  results += f"Breakdown of persons/clusters:\n"
619
  for cluster_id in range(num_clusters):
620
  results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
621
+
 
 
 
 
 
 
 
 
 
 
 
 
622
 
623
  return (
624
  results,
625
  anomaly_plot_all,
626
  anomaly_plot_comp,
627
  *emotion_plots,
628
+ face_samples["most_frequent"],
629
+ face_samples["others"]
630
  )
631
 
632
+
633
+ gallery_outputs = [
634
+ gr.Gallery(label="Most Frequent Person Random Samples", columns=5, rows=2, height="auto"),
635
+ gr.Gallery(label="Other Persons Random Samples", columns=5, rows=1, height="auto")
636
+ ]
637
+
638
  iface = gr.Interface(
639
  fn=process_video,
640
  inputs=[
641
  gr.Video(),
642
+ gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of Components"),
643
+ gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Desired FPS"),
644
  gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
645
  ],
646
  outputs=[
 
653
  gr.Plot(label="Happy Anomalies"),
654
  gr.Plot(label="Surprise Anomalies"),
655
  gr.Plot(label="Neutral Anomalies"),
656
+ ] + gallery_outputs,
 
657
  title="Facial Expressions Anomaly Detection",
658
  description="""
659
  This application detects anomalies in facial expressions and emotions from a video input.
660
+ It identifies distinct persons in the video and provides sample faces for each, with multiple samples for the most frequent person.
661
 
662
  Adjust the parameters as needed:
663
  - Number of Components: Complexity of the facial expression model
 
669
  allow_flagging="never"
670
  )
671
 
672
+
673
+ iface.launch()