reab5555 commited on
Commit
86bd3cd
·
verified ·
1 Parent(s): d431c9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -44
app.py CHANGED
@@ -21,6 +21,7 @@ from PIL import Image
21
  import gradio as gr
22
  import tempfile
23
  import shutil
 
24
 
25
  # Suppress TensorFlow warnings
26
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
@@ -34,7 +35,7 @@ matplotlib.rcParams['savefig.dpi'] = 400
34
  # Initialize models and other global variables
35
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
36
 
37
- mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.999, 0.999, 0.999], min_face_size=100,
38
  selection_method='largest')
39
  model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
40
  mp_face_mesh = mp.solutions.face_mesh
@@ -155,31 +156,20 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
155
 
156
  return embeddings_by_frame, emotions_by_frame, aligned_face_paths
157
 
158
- def cluster_faces(face_images):
159
- if len(face_images) < 2:
160
- print("Not enough faces for clustering. Assigning all to one cluster.")
161
- return np.zeros(len(face_images), dtype=int)
162
-
163
- # Resize all images to a consistent size
164
- resized_faces = [cv2.resize(face, (224, 224)) for face in face_images]
165
 
166
- # Convert images to grayscale and flatten
167
- gray_faces = [cv2.cvtColor(face, cv2.COLOR_BGR2GRAY).flatten() for face in resized_faces]
168
-
169
- # Stack the flattened images
170
- X = np.stack(gray_faces)
171
 
172
- # Normalize the pixel values
173
- X = X / 255.0
174
 
175
- # Perform DBSCAN clustering
176
- dbscan = DBSCAN(eps=0.3, min_samples=10, metric='euclidean')
177
  clusters = dbscan.fit_predict(X)
178
 
179
- # If DBSCAN assigns all to noise (-1), consider it as one cluster
180
  if np.all(clusters == -1):
181
  print("DBSCAN assigned all to noise. Considering as one cluster.")
182
- return np.zeros(len(face_images), dtype=int)
183
 
184
  return clusters
185
 
@@ -245,12 +235,10 @@ def determine_optimal_anomalies(anomaly_scores, z_threshold=3):
245
  anomalies = anomaly_scores > threshold
246
  return anomalies, np.where(anomalies)[0]
247
 
248
-
249
  def timecode_to_seconds(timecode):
250
  h, m, s = map(float, timecode.split(':'))
251
  return h * 3600 + m * 60 + s
252
 
253
-
254
  def group_similar_timecodes(timecodes, scores, threshold_seconds=5):
255
  grouped = []
256
  current_group = []
@@ -372,32 +360,39 @@ def normalize_scores(scores):
372
  return np.full_like(scores, 100)
373
  return ((scores - min_score) / (max_score - min_score)) * 100
374
 
 
 
 
 
 
 
375
  def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
376
- plt.figure(figsize=(16, 8), dpi=500)
377
  fig, ax = plt.subplots(figsize=(16, 8))
378
 
379
  df['Seconds'] = df['Timecode'].apply(
380
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
381
 
382
- # Normalize scores
383
  normalized_scores = normalize_scores(anomaly_scores)
384
 
385
- # Omit the first data point
386
  seconds = df['Seconds'].values[1:]
387
  scores = normalized_scores[1:]
388
 
389
- # Create scatter plot
390
  ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
391
 
392
- # Highlight top anomalies (excluding the first data point)
393
  top_indices = [idx for idx in top_indices if idx > 0]
394
  ax.scatter(df['Seconds'].iloc[top_indices], normalized_scores[top_indices], color='red', s=50, zorder=5)
395
 
396
- # Group similar timecodes
 
 
 
 
 
 
397
  grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
398
  normalized_scores[top_indices])
399
 
400
- # Add timecode annotations for grouped timecodes
401
  for group in grouped_timecodes:
402
  max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
403
  timecode, score, idx = group[max_score_idx]
@@ -415,35 +410,39 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
415
 
416
  ax.set_xlabel('Time')
417
  ax.set_ylabel('Anomaly Score')
418
- ax.set_title(f'Anomaly Scores ({title})')
419
 
420
  ax.grid(True, linestyle='--', alpha=0.7)
421
  plt.tight_layout()
 
422
  return fig
423
 
424
  def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
425
- plt.figure(figsize=(16, 8), dpi=500)
426
  fig, ax = plt.subplots(figsize=(16, 8))
427
 
428
  df['Seconds'] = df['Timecode'].apply(
429
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
430
 
431
- # Omit the first data point
432
  seconds = df['Seconds'].values[1:]
433
  scores = anomaly_scores[1:]
434
 
435
- # Create scatter plot
436
  ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
437
 
438
- # Highlight top anomalies (excluding the first data point)
439
  top_indices = [idx for idx in top_indices if idx > 0]
440
  ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
441
 
442
- # Group similar timecodes
 
 
 
 
 
 
 
443
  grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
444
  anomaly_scores[top_indices])
445
 
446
- # Add timecode annotations for grouped timecodes
447
  for group in grouped_timecodes:
448
  max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
449
  timecode, score, idx = group[max_score_idx]
@@ -465,6 +464,7 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
465
 
466
  ax.grid(True, linestyle='--', alpha=0.7)
467
  plt.tight_layout()
 
468
  return fig
469
 
470
  def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
@@ -475,10 +475,8 @@ def get_random_face_samples(organized_faces_folder, output_folder, largest_clust
475
  face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
476
  if face_files:
477
  if int(cluster_folder.split('_')[1]) == largest_cluster:
478
- # Get 10 samples for the largest cluster
479
  samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
480
  else:
481
- # Get 1 sample for other clusters
482
  samples = [np.random.choice(face_files)]
483
 
484
  for i, sample in enumerate(samples):
@@ -491,7 +489,6 @@ def get_random_face_samples(organized_faces_folder, output_folder, largest_clust
491
  face_samples.append(output_path)
492
  return face_samples
493
 
494
-
495
  def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
496
  output_folder = "output"
497
  os.makedirs(output_folder, exist_ok=True)
@@ -525,8 +522,8 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
525
  None, None, None, None, None, None, None, None, None)
526
 
527
  progress(0.6, "Clustering faces")
528
- face_images = [cv2.imread(path) for path in aligned_face_paths]
529
- clusters = cluster_faces(face_images)
530
  num_clusters = len(set(clusters)) # Get the number of unique clusters
531
 
532
  progress(0.7, "Organizing faces")
@@ -548,11 +545,9 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
548
  anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
549
  X, feature_columns, batch_size=batch_size)
550
 
551
- # Normalize anomaly scores
552
  anomaly_scores_all = normalize_scores(anomaly_scores_all)
553
  anomaly_scores_comp = normalize_scores(anomaly_scores_comp)
554
 
555
- # Perform anomaly detection for each emotion using LSTM autoencoder
556
  emotion_anomalies = {}
557
  for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
558
  anomalies, scores, indices = emotion_anomaly_detection(df[emotion])
@@ -568,7 +563,8 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
568
 
569
  progress(0.95, "Generating plots")
570
  try:
571
- anomaly_plot_all = plot_anomaly_scores(df, anomaly_scores_all, top_indices_all, "Facial Features + Emotions",
 
572
  df['Timecode'].iloc[top_indices_all].values)
573
  anomaly_plot_comp = plot_anomaly_scores(df, anomaly_scores_comp, top_indices_comp, "Facial Features",
574
  df['Timecode'].iloc[top_indices_comp].values)
@@ -611,7 +607,6 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
611
  face_samples
612
  )
613
 
614
-
615
  iface = gr.Interface(
616
  fn=process_video,
617
  inputs=[
@@ -641,6 +636,8 @@ iface = gr.Interface(
641
  - Number of Components: Complexity of the facial expression model
642
  - Desired FPS: Frames per second to analyze (lower for faster processing)
643
  - Batch Size: Affects processing speed and memory usage
 
 
644
  """,
645
  allow_flagging="never"
646
  )
 
21
  import gradio as gr
22
  import tempfile
23
  import shutil
24
+ import io
25
 
26
  # Suppress TensorFlow warnings
27
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
35
  # Initialize models and other global variables
36
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
37
 
38
+ mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.98, 0.98, 0.98], min_face_size=50,
39
  selection_method='largest')
40
  model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
41
  mp_face_mesh = mp.solutions.face_mesh
 
156
 
157
  return embeddings_by_frame, emotions_by_frame, aligned_face_paths
158
 
 
 
 
 
 
 
 
159
 
160
+ def cluster_faces(embeddings):
161
+ if len(embeddings) < 2:
162
+ print("Not enough faces for clustering. Assigning all to one cluster.")
163
+ return np.zeros(len(embeddings), dtype=int)
 
164
 
165
+ X = np.stack(embeddings)
 
166
 
167
+ dbscan = DBSCAN(eps=0.5, min_samples=5, metric='cosine')
 
168
  clusters = dbscan.fit_predict(X)
169
 
 
170
  if np.all(clusters == -1):
171
  print("DBSCAN assigned all to noise. Considering as one cluster.")
172
+ return np.zeros(len(embeddings), dtype=int)
173
 
174
  return clusters
175
 
 
235
  anomalies = anomaly_scores > threshold
236
  return anomalies, np.where(anomalies)[0]
237
 
 
238
  def timecode_to_seconds(timecode):
239
  h, m, s = map(float, timecode.split(':'))
240
  return h * 3600 + m * 60 + s
241
 
 
242
  def group_similar_timecodes(timecodes, scores, threshold_seconds=5):
243
  grouped = []
244
  current_group = []
 
360
  return np.full_like(scores, 100)
361
  return ((scores - min_score) / (max_score - min_score)) * 100
362
 
363
+ def plot_to_image(fig):
364
+ buf = io.BytesIO()
365
+ fig.savefig(buf, format='png', dpi=300, bbox_inches='tight')
366
+ buf.seek(0)
367
+ return buf
368
+
369
  def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
370
+ plt.figure(figsize=(16, 8), dpi=300)
371
  fig, ax = plt.subplots(figsize=(16, 8))
372
 
373
  df['Seconds'] = df['Timecode'].apply(
374
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
375
 
 
376
  normalized_scores = normalize_scores(anomaly_scores)
377
 
 
378
  seconds = df['Seconds'].values[1:]
379
  scores = normalized_scores[1:]
380
 
 
381
  ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
382
 
 
383
  top_indices = [idx for idx in top_indices if idx > 0]
384
  ax.scatter(df['Seconds'].iloc[top_indices], normalized_scores[top_indices], color='red', s=50, zorder=5)
385
 
386
+ # Calculate and plot baseline
387
+ non_anomalous_scores = np.delete(normalized_scores, top_indices)
388
+ baseline = np.mean(non_anomalous_scores)
389
+ ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
390
+ ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
391
+ verticalalignment='bottom', horizontalalignment='right', color='black')
392
+
393
  grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
394
  normalized_scores[top_indices])
395
 
 
396
  for group in grouped_timecodes:
397
  max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
398
  timecode, score, idx = group[max_score_idx]
 
410
 
411
  ax.set_xlabel('Time')
412
  ax.set_ylabel('Anomaly Score')
413
+ ax.set_title(title)
414
 
415
  ax.grid(True, linestyle='--', alpha=0.7)
416
  plt.tight_layout()
417
+ plt.close()
418
  return fig
419
 
420
  def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
421
+ plt.figure(figsize=(16, 8), dpi=300)
422
  fig, ax = plt.subplots(figsize=(16, 8))
423
 
424
  df['Seconds'] = df['Timecode'].apply(
425
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
426
 
 
427
  seconds = df['Seconds'].values[1:]
428
  scores = anomaly_scores[1:]
429
 
 
430
  ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
431
 
 
432
  top_indices = [idx for idx in top_indices if idx > 0]
433
  ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
434
 
435
+ # Calculate and plot baseline
436
+ non_anomalous_scores = np.delete(anomaly_scores, top_indices)
437
+ baseline = np.mean(non_anomalous_scores)
438
+ ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
439
+ ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
440
+ verticalalignment='bottom', horizontalalignment='right', color='black')
441
+
442
+
443
  grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
444
  anomaly_scores[top_indices])
445
 
 
446
  for group in grouped_timecodes:
447
  max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
448
  timecode, score, idx = group[max_score_idx]
 
464
 
465
  ax.grid(True, linestyle='--', alpha=0.7)
466
  plt.tight_layout()
467
+ plt.close()
468
  return fig
469
 
470
  def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
 
475
  face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
476
  if face_files:
477
  if int(cluster_folder.split('_')[1]) == largest_cluster:
 
478
  samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
479
  else:
 
480
  samples = [np.random.choice(face_files)]
481
 
482
  for i, sample in enumerate(samples):
 
489
  face_samples.append(output_path)
490
  return face_samples
491
 
 
492
  def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
493
  output_folder = "output"
494
  os.makedirs(output_folder, exist_ok=True)
 
522
  None, None, None, None, None, None, None, None, None)
523
 
524
  progress(0.6, "Clustering faces")
525
+ embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
526
+ clusters = cluster_faces(embeddings)
527
  num_clusters = len(set(clusters)) # Get the number of unique clusters
528
 
529
  progress(0.7, "Organizing faces")
 
545
  anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
546
  X, feature_columns, batch_size=batch_size)
547
 
 
548
  anomaly_scores_all = normalize_scores(anomaly_scores_all)
549
  anomaly_scores_comp = normalize_scores(anomaly_scores_comp)
550
 
 
551
  emotion_anomalies = {}
552
  for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
553
  anomalies, scores, indices = emotion_anomaly_detection(df[emotion])
 
563
 
564
  progress(0.95, "Generating plots")
565
  try:
566
+ anomaly_plot_all = plot_anomaly_scores(df, anomaly_scores_all, top_indices_all,
567
+ "Facial Features + Emotions",
568
  df['Timecode'].iloc[top_indices_all].values)
569
  anomaly_plot_comp = plot_anomaly_scores(df, anomaly_scores_comp, top_indices_comp, "Facial Features",
570
  df['Timecode'].iloc[top_indices_comp].values)
 
607
  face_samples
608
  )
609
 
 
610
  iface = gr.Interface(
611
  fn=process_video,
612
  inputs=[
 
636
  - Number of Components: Complexity of the facial expression model
637
  - Desired FPS: Frames per second to analyze (lower for faster processing)
638
  - Batch Size: Affects processing speed and memory usage
639
+
640
+ Click on any graph to enlarge it.
641
  """,
642
  allow_flagging="never"
643
  )