reab5555 commited on
Commit
981f52f
·
verified ·
1 Parent(s): 01f0185

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -112
app.py CHANGED
@@ -8,8 +8,10 @@ import seaborn as sns
8
  from facenet_pytorch import InceptionResnetV1, MTCNN
9
  import mediapipe as mp
10
  from fer import FER
11
- from sklearn.cluster import KMeans
 
12
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
 
13
  import umap
14
  import pandas as pd
15
  import matplotlib
@@ -20,7 +22,6 @@ import gradio as gr
20
  import tempfile
21
  import shutil
22
 
23
-
24
  # Suppress TensorFlow warnings
25
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
26
  import tensorflow as tf
@@ -31,7 +32,7 @@ matplotlib.rcParams['figure.dpi'] = 400
31
  matplotlib.rcParams['savefig.dpi'] = 400
32
 
33
  # Initialize models and other global variables
34
- device = 'cuda'
35
 
36
  mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.999, 0.999, 0.999], min_face_size=100,
37
  selection_method='largest')
@@ -40,7 +41,6 @@ mp_face_mesh = mp.solutions.face_mesh
40
  face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
41
  emotion_detector = FER(mtcnn=False)
42
 
43
-
44
  def frame_to_timecode(frame_num, total_frames, duration):
45
  total_seconds = (frame_num / total_frames) * duration
46
  hours = int(total_seconds // 3600)
@@ -49,7 +49,6 @@ def frame_to_timecode(frame_num, total_frames, duration):
49
  milliseconds = int((total_seconds - int(total_seconds)) * 1000)
50
  return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
51
 
52
-
53
  def get_face_embedding_and_emotion(face_img):
54
  face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
55
  face_tensor = (face_tensor - 0.5) / 0.5
@@ -65,7 +64,6 @@ def get_face_embedding_and_emotion(face_img):
65
 
66
  return embedding.cpu().numpy().flatten(), emotion_dict
67
 
68
-
69
  def alignFace(img):
70
  img_raw = img.copy()
71
  results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
@@ -91,13 +89,9 @@ def alignFace(img):
91
  new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
92
  return new_img
93
 
94
-
95
  def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
96
  os.makedirs(output_folder, exist_ok=True)
97
-
98
- # Load the video clip
99
  clip = VideoFileClip(video_path)
100
-
101
  original_fps = clip.fps
102
  duration = clip.duration
103
  total_frames = int(duration * original_fps)
@@ -106,23 +100,15 @@ def extract_frames(video_path, output_folder, desired_fps, progress_callback=Non
106
 
107
  frame_count = 0
108
  for t in np.arange(0, duration, step / original_fps):
109
- # Get the frame at time t
110
  frame = clip.get_frame(t)
111
-
112
- # Convert the frame to PIL Image and save it
113
  img = Image.fromarray(frame)
114
  img.save(os.path.join(output_folder, f"frame_{frame_count:04d}.jpg"))
115
-
116
  frame_count += 1
117
-
118
- # Report progress
119
  if progress_callback:
120
  progress = min(100, (frame_count / total_frames_to_extract) * 100)
121
  progress_callback(progress, f"Extracting frame")
122
-
123
  if frame_count >= total_frames_to_extract:
124
  break
125
-
126
  clip.close()
127
  return frame_count, original_fps
128
 
@@ -145,7 +131,6 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
145
  batch_nums.append(frame_num)
146
 
147
  if batch_frames:
148
- # Detect faces in batch
149
  batch_boxes, batch_probs = mtcnn.detect(batch_frames)
150
 
151
  for j, (frame, frame_num, boxes, probs) in enumerate(
@@ -173,13 +158,30 @@ def cluster_embeddings(embeddings):
173
  if len(embeddings) < 2:
174
  print("Not enough embeddings for clustering. Assigning all to one cluster.")
175
  return np.zeros(len(embeddings), dtype=int)
176
- n_clusters = min(3, len(embeddings)) # Use at most 3 clusters
177
  scaler = StandardScaler()
178
  embeddings_scaled = scaler.fit_transform(embeddings)
179
- kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
180
- clusters = kmeans.fit_predict(embeddings_scaled)
181
- return clusters
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder):
185
  for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
@@ -189,7 +191,6 @@ def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder
189
  dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
190
  shutil.copy(src, dst)
191
 
192
-
193
  def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps, original_fps, output_folder,
194
  num_components, video_duration):
195
  emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
@@ -237,14 +238,12 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
237
 
238
  return df, largest_cluster
239
 
240
-
241
  class LSTMAutoencoder(nn.Module):
242
  def __init__(self, input_size, hidden_size=64, num_layers=2):
243
  super(LSTMAutoencoder, self).__init__()
244
  self.input_size = input_size
245
  self.hidden_size = hidden_size
246
  self.num_layers = num_layers
247
-
248
  self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
249
  self.fc = nn.Linear(hidden_size, input_size)
250
 
@@ -253,13 +252,9 @@ class LSTMAutoencoder(nn.Module):
253
  out = self.fc(outputs)
254
  return out
255
 
256
-
257
  def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, batch_size=64):
258
- device = 'cuda'
259
-
260
  X = torch.FloatTensor(X).to(device)
261
-
262
- # Ensure X is 3D (batch, sequence, features)
263
  if X.dim() == 2:
264
  X = X.unsqueeze(0)
265
  elif X.dim() == 1:
@@ -293,13 +288,11 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
293
  with torch.no_grad():
294
  reconstructed = model(X).squeeze(0).cpu().numpy()
295
 
296
- # Compute anomalies for all features
297
  mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
298
  top_indices_all = mse_all.argsort()[-num_anomalies:][::-1]
299
  anomalies_all = np.zeros(len(mse_all), dtype=bool)
300
  anomalies_all[top_indices_all] = True
301
 
302
- # Compute anomalies for components only
303
  component_columns = [col for col in feature_columns if col.startswith('Comp')]
304
  component_indices = [feature_columns.index(col) for col in component_columns]
305
 
@@ -307,7 +300,7 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
307
  mse_comp = np.mean(
308
  np.power(X.squeeze(0).cpu().numpy()[:, component_indices] - reconstructed[:, component_indices], 2), axis=1)
309
  else:
310
- mse_comp = mse_all # If no components, use all features
311
 
312
  top_indices_comp = mse_comp.argsort()[-num_anomalies:][::-1]
313
  anomalies_comp = np.zeros(len(mse_comp), dtype=bool)
@@ -317,98 +310,130 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
317
  anomalies_comp, mse_comp, top_indices_comp,
318
  model)
319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
- def plot_emotion(df, emotion, num_anomalies, color):
322
- plt.figure(figsize=(16, 8), dpi=400) # Increase DPI for higher quality
323
  fig, ax = plt.subplots(figsize=(16, 8))
324
 
325
- # Convert timecodes to seconds for proper plotting
326
  df['Seconds'] = df['Timecode'].apply(
327
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
328
 
329
- # Create a DataFrame for seaborn
330
- plot_df = pd.DataFrame({
331
- 'Seconds': df['Seconds'],
332
- 'Emotion Score': df[emotion]
333
- })
334
 
335
- # Plot using seaborn
336
- sns.lineplot(x='Seconds', y='Emotion Score', data=plot_df, ax=ax, color=color)
 
 
337
 
338
- # Highlight top anomalies
339
- top_indices = np.argsort(df[emotion].values)[-num_anomalies:][::-1]
340
- ax.scatter(df['Seconds'].iloc[top_indices], df[emotion].iloc[top_indices], color='red', s=50, zorder=5)
 
 
341
 
342
- # Set x-axis
343
- max_seconds = df['Seconds'].max()
344
  ax.set_xlim(0, max_seconds)
345
- num_ticks = 80 # Reduce number of ticks for emotion graphs
346
  ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
347
- ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()], rotation=90, ha='right')
 
348
 
349
  ax.set_xlabel('Time')
350
- ax.set_ylabel(f'{emotion.capitalize()} Score')
351
- ax.set_title(f'{emotion.capitalize()} Scores Over Time (Top {num_anomalies} in Red)')
352
 
353
- # Add grid
354
  ax.grid(True, linestyle='--', alpha=0.7)
355
-
356
  plt.tight_layout()
357
  return fig
358
 
359
- def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
360
- plt.figure(figsize=(16, 8), dpi=400) # Increase DPI for higher quality
361
  fig, ax = plt.subplots(figsize=(16, 8))
362
 
363
- # Convert timecodes to seconds for proper plotting
364
  df['Seconds'] = df['Timecode'].apply(
365
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
366
 
367
- # Create a DataFrame for seaborn
368
- plot_df = pd.DataFrame({
369
- 'Seconds': df['Seconds'],
370
- 'Anomaly Score': anomaly_scores
371
- })
372
 
373
- # Plot using seaborn
374
- sns.lineplot(x='Seconds', y='Anomaly Score', data=plot_df, ax=ax)
 
 
375
 
376
- # Highlight top anomalies
377
- ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
 
 
 
378
 
379
- # Set x-axis
380
- max_seconds = df['Seconds'].max()
381
  ax.set_xlim(0, max_seconds)
382
- num_ticks = 80 # Increase number of ticks for anomaly score graphs
383
  ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
384
- ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()], rotation=90, ha='right')
 
385
 
386
  ax.set_xlabel('Time')
387
- ax.set_ylabel('Anomaly Score')
388
- ax.set_title(f'Anomaly Scores Over Time ({title})')
389
 
390
- # Add grid
391
  ax.grid(True, linestyle='--', alpha=0.7)
392
-
393
  plt.tight_layout()
394
  return fig
395
 
396
- def get_random_face_sample(organized_faces_folder, largest_cluster, output_folder):
397
- person_folder = os.path.join(organized_faces_folder, f"person_{largest_cluster}")
398
- face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
399
- if face_files:
400
- random_face = np.random.choice(face_files)
401
- face_path = os.path.join(person_folder, random_face)
402
- output_path = os.path.join(output_folder, "random_face_sample.jpg")
403
-
404
- # Read the image and resize it to be smaller
405
- face_img = cv2.imread(face_path)
406
- small_face = cv2.resize(face_img, (160, 160)) # Resize to NxN pixels
407
- cv2.imwrite(output_path, small_face)
408
-
409
- return output_path
410
- return None
411
-
412
 
413
  def process_video(video_path, num_anomalies, num_components, desired_fps, batch_size, progress=gr.Progress()):
414
  output_folder = "output"
@@ -420,7 +445,6 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
420
  os.makedirs(aligned_faces_folder, exist_ok=True)
421
  os.makedirs(organized_faces_folder, exist_ok=True)
422
 
423
-
424
  clip = VideoFileClip(video_path)
425
  video_duration = clip.duration
426
  clip.close()
@@ -434,21 +458,25 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
434
  frame_count, original_fps = extract_frames(video_path, frames_folder, desired_fps, extraction_progress)
435
 
436
  progress(1, "Frame extraction complete")
437
-
438
  progress(0.3, "Processing frames")
439
  embeddings_by_frame, emotions_by_frame = process_frames(frames_folder, aligned_faces_folder, frame_count,
440
  progress, batch_size)
441
 
442
  if not embeddings_by_frame:
443
- return "No faces were extracted from the video.", None, None, None, None, None, None
 
444
 
445
  progress(0.6, "Clustering embeddings")
446
  embeddings = list(embeddings_by_frame.values())
447
  clusters = cluster_embeddings(embeddings)
 
448
 
449
  progress(0.7, "Organizing faces")
450
  organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
451
 
 
 
 
452
  progress(0.8, "Saving person data")
453
  df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
454
  original_fps, temp_dir, num_components, video_duration)
@@ -464,9 +492,7 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
464
  X, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
465
  except Exception as e:
466
  print(f"Error details: {str(e)}")
467
- print(f"X shape: {X.shape}")
468
- print(f"X dtype: {X.dtype}")
469
- return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None
470
 
471
  progress(0.95, "Generating plots")
472
  try:
@@ -484,7 +510,10 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
484
  return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None, None
485
 
486
  progress(1.0, "Preparing results")
487
- results = f"Top {num_anomalies} anomalies (All Features):\n"
 
 
 
488
  results += "\n".join([f"{score:.4f} at {timecode}" for score, timecode in
489
  zip(anomaly_scores_all[top_indices_all], df['Timecode'].iloc[top_indices_all].values)])
490
  results += f"\n\nTop {num_anomalies} anomalies (Components Only):\n"
@@ -496,52 +525,49 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
496
  results += f"\n\nTop {num_anomalies} {emotion.capitalize()} Scores:\n"
497
  results += "\n".join([f"{df[emotion].iloc[i]:.4f} at {df['Timecode'].iloc[i]}" for i in top_indices])
498
 
499
- # Get a random face sample
500
- face_sample = get_random_face_sample(organized_faces_folder, largest_cluster, output_folder)
501
-
502
  return (
503
  results,
504
  anomaly_plot_all,
505
  anomaly_plot_comp,
506
  *emotion_plots,
507
- face_sample
508
  )
509
 
510
 
511
- # Gradio interface
512
  iface = gr.Interface(
513
  fn=process_video,
514
  inputs=[
515
  gr.Video(),
516
- gr.Slider(minimum=1, maximum=20, step=1, value=5, label="Number of Anomalies"),
517
  gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
518
- gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Desired FPS"),
519
- gr.Slider(minimum=1, maximum=64, step=1, value=8, label="Batch Size")
520
  ],
521
  outputs=[
522
  gr.Textbox(label="Anomaly Detection Results"),
523
- gr.Plot(label="Anomaly Scores (All Features)"),
524
- gr.Plot(label="Anomaly Scores (Components Only)"),
525
  gr.Plot(label="Fear Anomalies"),
526
  gr.Plot(label="Sad Anomalies"),
527
  gr.Plot(label="Angry Anomalies"),
528
  gr.Plot(label="Happy Anomalies"),
529
  gr.Plot(label="Surprise Anomalies"),
530
  gr.Plot(label="Neutral Anomalies"),
531
- gr.Image(type="filepath", label="Random Face Sample of Most Frequent Person"),
532
  ],
533
  title="Facial Expressions Anomaly Detection",
534
  description="""
535
  This application detects anomalies in facial expressions and emotions from a video input.
536
- It focuses on the most frequently appearing person in the video for analysis.
537
 
538
  Adjust the parameters as needed:
539
  - Number of Anomalies: How many top anomalies or high intensities to highlight
540
  - Number of Components: Complexity of the facial expression model
541
  - Desired FPS: Frames per second to analyze (lower for faster processing)
542
  - Batch Size: Affects processing speed and memory usage
543
- """
 
544
  )
545
 
546
  if __name__ == "__main__":
547
- iface.launch()
 
8
  from facenet_pytorch import InceptionResnetV1, MTCNN
9
  import mediapipe as mp
10
  from fer import FER
11
+ from scipy import interpolate
12
+ from sklearn.cluster import DBSCAN, KMeans
13
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
14
+ from sklearn.metrics import silhouette_score
15
  import umap
16
  import pandas as pd
17
  import matplotlib
 
22
  import tempfile
23
  import shutil
24
 
 
25
  # Suppress TensorFlow warnings
26
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
27
  import tensorflow as tf
 
32
  matplotlib.rcParams['savefig.dpi'] = 400
33
 
34
  # Initialize models and other global variables
35
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
36
 
37
  mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.999, 0.999, 0.999], min_face_size=100,
38
  selection_method='largest')
 
41
  face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
42
  emotion_detector = FER(mtcnn=False)
43
 
 
44
  def frame_to_timecode(frame_num, total_frames, duration):
45
  total_seconds = (frame_num / total_frames) * duration
46
  hours = int(total_seconds // 3600)
 
49
  milliseconds = int((total_seconds - int(total_seconds)) * 1000)
50
  return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
51
 
 
52
  def get_face_embedding_and_emotion(face_img):
53
  face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
54
  face_tensor = (face_tensor - 0.5) / 0.5
 
64
 
65
  return embedding.cpu().numpy().flatten(), emotion_dict
66
 
 
67
  def alignFace(img):
68
  img_raw = img.copy()
69
  results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 
89
  new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
90
  return new_img
91
 
 
92
  def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
93
  os.makedirs(output_folder, exist_ok=True)
 
 
94
  clip = VideoFileClip(video_path)
 
95
  original_fps = clip.fps
96
  duration = clip.duration
97
  total_frames = int(duration * original_fps)
 
100
 
101
  frame_count = 0
102
  for t in np.arange(0, duration, step / original_fps):
 
103
  frame = clip.get_frame(t)
 
 
104
  img = Image.fromarray(frame)
105
  img.save(os.path.join(output_folder, f"frame_{frame_count:04d}.jpg"))
 
106
  frame_count += 1
 
 
107
  if progress_callback:
108
  progress = min(100, (frame_count / total_frames_to_extract) * 100)
109
  progress_callback(progress, f"Extracting frame")
 
110
  if frame_count >= total_frames_to_extract:
111
  break
 
112
  clip.close()
113
  return frame_count, original_fps
114
 
 
131
  batch_nums.append(frame_num)
132
 
133
  if batch_frames:
 
134
  batch_boxes, batch_probs = mtcnn.detect(batch_frames)
135
 
136
  for j, (frame, frame_num, boxes, probs) in enumerate(
 
158
  if len(embeddings) < 2:
159
  print("Not enough embeddings for clustering. Assigning all to one cluster.")
160
  return np.zeros(len(embeddings), dtype=int)
161
+
162
  scaler = StandardScaler()
163
  embeddings_scaled = scaler.fit_transform(embeddings)
 
 
 
164
 
165
+ # Use DBSCAN for adaptive clustering
166
+ dbscan = DBSCAN(eps=0.5, min_samples=5) # Adjust these parameters as needed
167
+ clusters = dbscan.fit_predict(embeddings_scaled)
168
+
169
+ # If DBSCAN couldn't find meaningful clusters, fall back to KMeans
170
+ if len(set(clusters)) == 1:
171
+ best_n_clusters = 1
172
+ best_score = -1
173
+ for n_clusters in range(2, min(5, len(embeddings))):
174
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
175
+ labels = kmeans.fit_predict(embeddings_scaled)
176
+ score = silhouette_score(embeddings_scaled, labels)
177
+ if score > best_score:
178
+ best_score = score
179
+ best_n_clusters = n_clusters
180
+
181
+ kmeans = KMeans(n_clusters=best_n_clusters, random_state=42, n_init=10)
182
+ clusters = kmeans.fit_predict(embeddings_scaled)
183
+
184
+ return clusters
185
 
186
  def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder):
187
  for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
 
191
  dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
192
  shutil.copy(src, dst)
193
 
 
194
  def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps, original_fps, output_folder,
195
  num_components, video_duration):
196
  emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
 
238
 
239
  return df, largest_cluster
240
 
 
241
  class LSTMAutoencoder(nn.Module):
242
  def __init__(self, input_size, hidden_size=64, num_layers=2):
243
  super(LSTMAutoencoder, self).__init__()
244
  self.input_size = input_size
245
  self.hidden_size = hidden_size
246
  self.num_layers = num_layers
 
247
  self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
248
  self.fc = nn.Linear(hidden_size, input_size)
249
 
 
252
  out = self.fc(outputs)
253
  return out
254
 
 
255
  def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, batch_size=64):
256
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
257
  X = torch.FloatTensor(X).to(device)
 
 
258
  if X.dim() == 2:
259
  X = X.unsqueeze(0)
260
  elif X.dim() == 1:
 
288
  with torch.no_grad():
289
  reconstructed = model(X).squeeze(0).cpu().numpy()
290
 
 
291
  mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
292
  top_indices_all = mse_all.argsort()[-num_anomalies:][::-1]
293
  anomalies_all = np.zeros(len(mse_all), dtype=bool)
294
  anomalies_all[top_indices_all] = True
295
 
 
296
  component_columns = [col for col in feature_columns if col.startswith('Comp')]
297
  component_indices = [feature_columns.index(col) for col in component_columns]
298
 
 
300
  mse_comp = np.mean(
301
  np.power(X.squeeze(0).cpu().numpy()[:, component_indices] - reconstructed[:, component_indices], 2), axis=1)
302
  else:
303
+ mse_comp = mse_all
304
 
305
  top_indices_comp = mse_comp.argsort()[-num_anomalies:][::-1]
306
  anomalies_comp = np.zeros(len(mse_comp), dtype=bool)
 
310
  anomalies_comp, mse_comp, top_indices_comp,
311
  model)
312
 
313
+ from scipy import interpolate
314
+
315
+ def plot_with_segments(ax, df_filtered, y_column, color):
316
+ segments = []
317
+ current_segment = []
318
+ for i, (time, score) in enumerate(zip(df_filtered['Seconds'], df_filtered[y_column])):
319
+ if i > 0 and time - df_filtered['Seconds'].iloc[i-1] > 1: # Gap of more than 1 second
320
+ if current_segment:
321
+ segments.append(current_segment)
322
+ current_segment = []
323
+ current_segment.append((time, score))
324
+ if current_segment:
325
+ segments.append(current_segment)
326
+
327
+ for segment in segments:
328
+ times, scores = zip(*segment)
329
+ if len(times) > 3:
330
+ try:
331
+ # Use scipy's interpolate to create a smooth curve
332
+ f = interpolate.interp1d(times, scores, kind='cubic')
333
+ smooth_times = np.linspace(min(times), max(times), num=200)
334
+ smooth_scores = f(smooth_times)
335
+ ax.plot(smooth_times, smooth_scores, color=color, linewidth=1.5)
336
+ except ValueError:
337
+ # Fall back to linear interpolation if cubic fails
338
+ f = interpolate.interp1d(times, scores, kind='linear')
339
+ smooth_times = np.linspace(min(times), max(times), num=200)
340
+ smooth_scores = f(smooth_times)
341
+ ax.plot(smooth_times, smooth_scores, color=color, linewidth=1.5)
342
+ else:
343
+ # For very short segments, just plot the points
344
+ ax.plot(times, scores, color=color, linewidth=1.5)
345
 
346
+ def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
347
+ plt.figure(figsize=(16, 8), dpi=400)
348
  fig, ax = plt.subplots(figsize=(16, 8))
349
 
 
350
  df['Seconds'] = df['Timecode'].apply(
351
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
352
 
353
+ # Filter out rows with no data
354
+ mask = ~np.isnan(anomaly_scores)
355
+ df_filtered = df[mask].copy()
356
+ df_filtered['anomaly_scores'] = anomaly_scores[mask]
 
357
 
358
+ if df_filtered.empty:
359
+ ax.text(0.5, 0.5, "No data available", ha='center', va='center')
360
+ else:
361
+ plot_with_segments(ax, df_filtered, 'anomaly_scores', 'blue')
362
 
363
+ # Highlight top anomalies
364
+ top_indices_filtered = [i for i in top_indices if i in df_filtered.index]
365
+ ax.scatter(df_filtered['Seconds'].iloc[top_indices_filtered],
366
+ df_filtered['anomaly_scores'].iloc[top_indices_filtered],
367
+ color='red', s=100, zorder=5)
368
 
369
+ max_seconds = df['Seconds'].max() # Use the full range for x-axis
 
370
  ax.set_xlim(0, max_seconds)
371
+ num_ticks = 80
372
  ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
373
+ ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
374
+ rotation=90, ha='center', va='top')
375
 
376
  ax.set_xlabel('Time')
377
+ ax.set_ylabel('Anomaly Score')
378
+ ax.set_title(f'Anomaly Scores Over Time ({title})')
379
 
 
380
  ax.grid(True, linestyle='--', alpha=0.7)
 
381
  plt.tight_layout()
382
  return fig
383
 
384
+ def plot_emotion(df, emotion, num_anomalies, color):
385
+ plt.figure(figsize=(16, 8), dpi=400)
386
  fig, ax = plt.subplots(figsize=(16, 8))
387
 
 
388
  df['Seconds'] = df['Timecode'].apply(
389
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
390
 
391
+ # Filter out rows with no data
392
+ mask = ~np.isnan(df[emotion])
393
+ df_filtered = df[mask]
 
 
394
 
395
+ if df_filtered.empty:
396
+ ax.text(0.5, 0.5, "No data available", ha='center', va='center')
397
+ else:
398
+ plot_with_segments(ax, df_filtered, emotion, color)
399
 
400
+ # Highlight top anomalies
401
+ top_indices = np.argsort(df_filtered[emotion].values)[-num_anomalies:][::-1]
402
+ ax.scatter(df_filtered['Seconds'].iloc[top_indices],
403
+ df_filtered[emotion].iloc[top_indices],
404
+ color='red', s=100, zorder=5)
405
 
406
+ max_seconds = df['Seconds'].max() # Use the full range for x-axis
 
407
  ax.set_xlim(0, max_seconds)
408
+ num_ticks = 80
409
  ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
410
+ ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
411
+ rotation=90, ha='center', va='top')
412
 
413
  ax.set_xlabel('Time')
414
+ ax.set_ylabel(f'{emotion.capitalize()} Score')
415
+ ax.set_title(f'{emotion.capitalize()} Scores Over Time (Top {num_anomalies} in Red)')
416
 
 
417
  ax.grid(True, linestyle='--', alpha=0.7)
 
418
  plt.tight_layout()
419
  return fig
420
 
421
+ def get_random_face_samples(organized_faces_folder, output_folder):
422
+ face_samples = {}
423
+ for cluster_folder in os.listdir(organized_faces_folder):
424
+ if cluster_folder.startswith("person_"):
425
+ cluster_id = int(cluster_folder.split("_")[1])
426
+ person_folder = os.path.join(organized_faces_folder, cluster_folder)
427
+ face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
428
+ if face_files:
429
+ random_face = np.random.choice(face_files)
430
+ face_path = os.path.join(person_folder, random_face)
431
+ output_path = os.path.join(output_folder, f"face_sample_person_{cluster_id}.jpg")
432
+ face_img = cv2.imread(face_path)
433
+ small_face = cv2.resize(face_img, (160, 160))
434
+ cv2.imwrite(output_path, small_face)
435
+ face_samples[cluster_id] = output_path
436
+ return face_samples
437
 
438
  def process_video(video_path, num_anomalies, num_components, desired_fps, batch_size, progress=gr.Progress()):
439
  output_folder = "output"
 
445
  os.makedirs(aligned_faces_folder, exist_ok=True)
446
  os.makedirs(organized_faces_folder, exist_ok=True)
447
 
 
448
  clip = VideoFileClip(video_path)
449
  video_duration = clip.duration
450
  clip.close()
 
458
  frame_count, original_fps = extract_frames(video_path, frames_folder, desired_fps, extraction_progress)
459
 
460
  progress(1, "Frame extraction complete")
 
461
  progress(0.3, "Processing frames")
462
  embeddings_by_frame, emotions_by_frame = process_frames(frames_folder, aligned_faces_folder, frame_count,
463
  progress, batch_size)
464
 
465
  if not embeddings_by_frame:
466
+ return ("No faces were extracted from the video.",
467
+ None, None, None, None, None, None, None, None, None)
468
 
469
  progress(0.6, "Clustering embeddings")
470
  embeddings = list(embeddings_by_frame.values())
471
  clusters = cluster_embeddings(embeddings)
472
+ num_clusters = len(set(clusters)) # Get the number of unique clusters
473
 
474
  progress(0.7, "Organizing faces")
475
  organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
476
 
477
+ progress(0.75, "Getting face samples")
478
+ face_samples = get_random_face_samples(organized_faces_folder, output_folder)
479
+
480
  progress(0.8, "Saving person data")
481
  df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
482
  original_fps, temp_dir, num_components, video_duration)
 
492
  X, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
493
  except Exception as e:
494
  print(f"Error details: {str(e)}")
495
+ return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None, None, None, None
 
 
496
 
497
  progress(0.95, "Generating plots")
498
  try:
 
510
  return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None, None
511
 
512
  progress(1.0, "Preparing results")
513
+ results = f"Number of persons detected: {num_clusters}\n\n"
514
+ for cluster_id in range(num_clusters):
515
+ results += f"Person {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
516
+ results += f"\nTop {num_anomalies} anomalies (All Features):\n"
517
  results += "\n".join([f"{score:.4f} at {timecode}" for score, timecode in
518
  zip(anomaly_scores_all[top_indices_all], df['Timecode'].iloc[top_indices_all].values)])
519
  results += f"\n\nTop {num_anomalies} anomalies (Components Only):\n"
 
525
  results += f"\n\nTop {num_anomalies} {emotion.capitalize()} Scores:\n"
526
  results += "\n".join([f"{df[emotion].iloc[i]:.4f} at {df['Timecode'].iloc[i]}" for i in top_indices])
527
 
 
 
 
528
  return (
529
  results,
530
  anomaly_plot_all,
531
  anomaly_plot_comp,
532
  *emotion_plots,
533
+ *[face_samples.get(i, None) for i in range(num_clusters)]
534
  )
535
 
536
 
 
537
  iface = gr.Interface(
538
  fn=process_video,
539
  inputs=[
540
  gr.Video(),
541
+ gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Anomalies"),
542
  gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
543
+ gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Desired FPS"),
544
+ gr.Slider(minimum=1, maximum=32, step=4, value=8, label="Batch Size")
545
  ],
546
  outputs=[
547
  gr.Textbox(label="Anomaly Detection Results"),
548
+ gr.Plot(label="Anomaly Scores (Facial Features + Emotions)"),
549
+ gr.Plot(label="Anomaly Scores (Facial Features)"),
550
  gr.Plot(label="Fear Anomalies"),
551
  gr.Plot(label="Sad Anomalies"),
552
  gr.Plot(label="Angry Anomalies"),
553
  gr.Plot(label="Happy Anomalies"),
554
  gr.Plot(label="Surprise Anomalies"),
555
  gr.Plot(label="Neutral Anomalies"),
556
+ gr.Gallery(label="Detected Persons", columns=[2], rows=[1], height="auto")
557
  ],
558
  title="Facial Expressions Anomaly Detection",
559
  description="""
560
  This application detects anomalies in facial expressions and emotions from a video input.
561
+ It identifies distinct persons in the video and provides a sample face for each.
562
 
563
  Adjust the parameters as needed:
564
  - Number of Anomalies: How many top anomalies or high intensities to highlight
565
  - Number of Components: Complexity of the facial expression model
566
  - Desired FPS: Frames per second to analyze (lower for faster processing)
567
  - Batch Size: Affects processing speed and memory usage
568
+ """,
569
+ allow_flagging="never"
570
  )
571
 
572
  if __name__ == "__main__":
573
+ iface.launch()