reab5555 commited on
Commit
730bdf9
·
verified ·
1 Parent(s): a75a127

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +416 -159
app.py CHANGED
@@ -8,32 +8,38 @@ import torch.optim as optim
8
  from facenet_pytorch import InceptionResnetV1, MTCNN
9
  import tensorflow as tf
10
  import mediapipe as mp
11
- from fer import FER
12
  from sklearn.cluster import DBSCAN
13
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
14
  import pandas as pd
15
  import matplotlib
16
  import matplotlib.pyplot as plt
 
17
  from matplotlib.patches import Rectangle
18
  from moviepy.editor import VideoFileClip
19
- from PIL import Image
20
  import gradio as gr
21
  import tempfile
22
  import shutil
23
- import copy
24
  import time
25
 
26
- matplotlib.rcParams['figure.dpi'] = 500
27
- matplotlib.rcParams['savefig.dpi'] = 500
 
28
 
29
  # Initialize models and other global variables
30
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
31
 
32
- mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.95, 0.95, 0.95], min_face_size=80)
33
  model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
 
34
  mp_face_mesh = mp.solutions.face_mesh
35
- face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
36
- emotion_detector = FER(mtcnn=False)
 
 
 
37
 
38
  def frame_to_timecode(frame_num, total_frames, duration):
39
  total_seconds = (frame_num / total_frames) * duration
@@ -53,20 +59,13 @@ def timecode_to_seconds(timecode):
53
  h, m, s = map(int, timecode.split(':'))
54
  return h * 3600 + m * 60 + s
55
 
56
- def get_face_embedding_and_emotion(face_img):
57
  face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
58
  face_tensor = (face_tensor - 0.5) / 0.5
59
  face_tensor = face_tensor.to(device)
60
  with torch.no_grad():
61
  embedding = model(face_tensor)
62
-
63
- emotions = emotion_detector.detect_emotions(face_img)
64
- if emotions:
65
- emotion_dict = emotions[0]['emotions']
66
- else:
67
- emotion_dict = {e: 0 for e in ['angry', 'disgust', 'fear', 'sad', 'happy']}
68
-
69
- return embedding.cpu().numpy().flatten(), emotion_dict
70
 
71
  def alignFace(img):
72
  img_raw = img.copy()
@@ -93,6 +92,53 @@ def alignFace(img):
93
  new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
94
  return new_img
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
97
  os.makedirs(output_folder, exist_ok=True)
98
  clip = VideoFileClip(video_path)
@@ -130,9 +176,12 @@ def is_frontal_face(landmarks, threshold=40):
130
  angle_degrees = math.degrees(angle)
131
  return abs(180 - angle_degrees) < threshold
132
 
 
133
  def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size):
134
  embeddings_by_frame = {}
135
  emotions_by_frame = {}
 
 
136
  aligned_face_paths = []
137
  frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith('.jpg')])
138
 
@@ -154,6 +203,12 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
154
 
155
  for j, (frame, frame_num, boxes, probs) in enumerate(
156
  zip(batch_frames, batch_nums, batch_boxes, batch_probs)):
 
 
 
 
 
 
157
  if boxes is not None and len(boxes) > 0 and probs[0] >= 0.99:
158
  x1, y1, x2, y2 = [int(b) for b in boxes[0]]
159
  face = frame[y1:y2, x1:x2]
@@ -166,14 +221,14 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
166
  output_path = os.path.join(aligned_faces_folder, f"frame_{frame_num}_face.jpg")
167
  cv2.imwrite(output_path, aligned_face_resized)
168
  aligned_face_paths.append(output_path)
169
- embedding, emotion = get_face_embedding_and_emotion(aligned_face_resized)
170
  embeddings_by_frame[frame_num] = embedding
171
- emotions_by_frame[frame_num] = emotion
172
 
173
  progress((i + len(batch_files)) / len(frame_files),
174
  f"Processing frames {i + 1} to {min(i + len(batch_files), len(frame_files))} of {len(frame_files)}")
175
 
176
- return embeddings_by_frame, emotions_by_frame, aligned_face_paths
 
177
 
178
  def cluster_faces(embeddings):
179
  if len(embeddings) < 2:
@@ -198,20 +253,19 @@ def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder
198
  dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
199
  shutil.copy(src, dst)
200
 
201
- def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps, original_fps, output_folder, video_duration):
202
- emotions = ['angry', 'disgust', 'fear', 'sad', 'happy']
203
  person_data = {}
204
 
205
- for (frame_num, embedding), (_, emotion_dict), cluster in zip(embeddings_by_frame.items(), emotions_by_frame.items(), clusters):
206
  if cluster not in person_data:
207
  person_data[cluster] = []
208
- person_data[cluster].append((frame_num, embedding, {e: emotion_dict[e] for e in emotions}))
209
 
210
  largest_cluster = max(person_data, key=lambda k: len(person_data[k]))
211
 
212
  data = person_data[largest_cluster]
213
  data.sort(key=lambda x: x[0])
214
- frames, embeddings, emotions_data = zip(*data)
215
 
216
  embeddings_array = np.array(embeddings)
217
  np.save(os.path.join(output_folder, 'face_embeddings.npy'), embeddings_array)
@@ -228,9 +282,6 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
228
  for i in range(len(embeddings[0])):
229
  df_data[f'Raw_Embedding_{i}'] = [embedding[i] for embedding in embeddings]
230
 
231
- for emotion in emotions:
232
- df_data[emotion] = [e[emotion] for e in emotions_data]
233
-
234
  df = pd.DataFrame(df_data)
235
 
236
  return df, largest_cluster
@@ -270,62 +321,53 @@ def determine_anomalies(mse_values, threshold):
270
  anomalies = mse_values > (mean + threshold * std)
271
  return anomalies
272
 
273
- def anomaly_detection(X_emotions, X_embeddings, epochs=200, batch_size=8, patience=3):
274
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
275
-
276
- # Normalize emotions
277
- scaler_emotions = MinMaxScaler()
278
- X_emotions_scaled = scaler_emotions.fit_transform(X_emotions)
279
-
280
- # Process emotions
281
- X_emotions_scaled = torch.FloatTensor(X_emotions_scaled).to(device)
282
- if X_emotions_scaled.dim() == 2:
283
- X_emotions_scaled = X_emotions_scaled.unsqueeze(0)
284
-
285
- model_emotions = Autoencoder(input_size=X_emotions_scaled.shape[2]).to(device)
286
- criterion = nn.MSELoss()
287
- optimizer_emotions = optim.Adam(model_emotions.parameters())
288
-
289
- # Train emotions model
290
- for epoch in range(epochs):
291
- model_emotions.train()
292
- optimizer_emotions.zero_grad()
293
- output_emotions = model_emotions(X_emotions_scaled)
294
- loss_emotions = criterion(output_emotions, X_emotions_scaled)
295
- loss_emotions.backward()
296
- optimizer_emotions.step()
297
 
298
  # Process facial embeddings
299
  X_embeddings = torch.FloatTensor(X_embeddings).to(device)
300
  if X_embeddings.dim() == 2:
301
  X_embeddings = X_embeddings.unsqueeze(0)
302
 
 
 
 
 
 
303
  model_embeddings = Autoencoder(input_size=X_embeddings.shape[2]).to(device)
 
 
 
304
  optimizer_embeddings = optim.Adam(model_embeddings.parameters())
 
305
 
306
- # Train embeddings model
307
  for epoch in range(epochs):
308
- model_embeddings.train()
309
- optimizer_embeddings.zero_grad()
310
- output_embeddings = model_embeddings(X_embeddings)
311
- loss_embeddings = criterion(output_embeddings, X_embeddings)
312
- loss_embeddings.backward()
313
- optimizer_embeddings.step()
314
-
315
- # Compute MSE for emotions and embeddings
316
- model_emotions.eval()
 
317
  model_embeddings.eval()
 
318
  with torch.no_grad():
319
- reconstructed_emotions = model_emotions(X_emotions_scaled).cpu().numpy()
320
  reconstructed_embeddings = model_embeddings(X_embeddings).cpu().numpy()
 
321
 
322
- mse_emotions = np.mean(np.power(X_emotions_scaled.cpu().numpy() - reconstructed_emotions, 2), axis=2).squeeze()
323
  mse_embeddings = np.mean(np.power(X_embeddings.cpu().numpy() - reconstructed_embeddings, 2), axis=2).squeeze()
 
324
 
325
- return mse_emotions, mse_embeddings
326
 
327
- def plot_mse(df, mse_values, title, color='blue', time_threshold=3, anomaly_threshold=4):
328
- plt.figure(figsize=(16, 8), dpi=500)
329
  fig, ax = plt.subplots(figsize=(16, 8))
330
 
331
  if 'Seconds' not in df.columns:
@@ -347,12 +389,11 @@ def plot_mse(df, mse_values, title, color='blue', time_threshold=3, anomaly_thre
347
  median = np.median(mse_values)
348
 
349
  ax.scatter(df['Seconds'], mse_values, color=color, alpha=0.3, s=5)
350
- ax.plot(df['Seconds'], mean, color=color, linewidth=2)
351
- ax.fill_between(df['Seconds'], mean - std, mean + std, color=color, alpha=0.2)
352
 
353
  # Add median line
354
- ax.axhline(y=median, color='black', linestyle='--', label='Baseline')
355
- ax.text(ax.get_xlim()[1], median, 'Baseline', verticalalignment='center', horizontalalignment='left', color='black')
356
 
357
  # Add threshold line
358
  threshold = np.mean(mse_values) + anomaly_threshold * np.std(mse_values)
@@ -362,7 +403,7 @@ def plot_mse(df, mse_values, title, color='blue', time_threshold=3, anomaly_thre
362
  anomalies = determine_anomalies(mse_values, anomaly_threshold)
363
  anomaly_frames = df['Frame'].iloc[anomalies].tolist()
364
 
365
- ax.scatter(df['Seconds'].iloc[anomalies], mse_values[anomalies], color='red', s=25, zorder=5)
366
 
367
  anomaly_data = list(zip(df['Timecode'].iloc[anomalies],
368
  df['Seconds'].iloc[anomalies],
@@ -384,7 +425,7 @@ def plot_mse(df, mse_values, title, color='blue', time_threshold=3, anomaly_thre
384
  start_sec = group[0][1]
385
  end_sec = group[-1][1]
386
  rect = Rectangle((start_sec, ax.get_ylim()[0]), end_sec - start_sec, ax.get_ylim()[1] - ax.get_ylim()[0],
387
- facecolor='red', alpha=0.3, zorder=1)
388
  ax.add_patch(rect)
389
 
390
  for group in grouped_anomalies:
@@ -412,8 +453,8 @@ def plot_mse(df, mse_values, title, color='blue', time_threshold=3, anomaly_thre
412
  return fig, anomaly_frames
413
 
414
  def plot_mse_histogram(mse_values, title, anomaly_threshold, color='blue'):
415
- plt.figure(figsize=(16, 8), dpi=500)
416
- fig, ax = plt.subplots(figsize=(16, 8))
417
 
418
  ax.hist(mse_values, bins=100, edgecolor='black', color=color, alpha=0.7)
419
  ax.set_xlabel('Mean Squared Error')
@@ -440,56 +481,152 @@ def plot_mse_histogram(mse_values, title, anomaly_threshold, color='blue'):
440
  return fig
441
 
442
 
443
- def plot_emotion(df, emotion, color, anomaly_threshold):
444
- plt.figure(figsize=(16, 8), dpi=500)
445
  fig, ax = plt.subplots(figsize=(16, 8))
446
 
447
  df['Seconds'] = df['Timecode'].apply(
448
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
449
 
450
- mean = df[emotion].rolling(window=10).mean()
451
- std = df[emotion].rolling(window=10).std()
452
- median = df[emotion].median()
453
-
454
- ax.scatter(df['Seconds'], df[emotion], color=color, alpha=0.3, s=5)
455
- ax.plot(df['Seconds'], mean, color=color, linewidth=2)
456
- ax.fill_between(df['Seconds'], mean - std, mean + std, color=color, alpha=0.2)
457
 
458
- # Add median line
459
- ax.axhline(y=median, color='black', linestyle='--', label='Baseline')
460
- ax.text(ax.get_xlim()[1], median, 'Baseline', verticalalignment='center', horizontalalignment='left', color='black')
461
 
462
- # Convert anomaly threshold to probability
463
- probability_threshold = (anomaly_threshold - 1) / 6 # Convert 1-7 scale to 0-1 probability
 
464
 
465
- # Add threshold line and detect anomalies
466
- ax.axhline(y=probability_threshold, color='red', linestyle='--', label=f'Threshold: {probability_threshold:.2f}')
467
- ax.text(ax.get_xlim()[1], probability_threshold, f'Threshold: {probability_threshold:.2f}',
468
- verticalalignment='center', horizontalalignment='left', color='red')
469
 
470
- # Detect and highlight anomalies
471
- anomalies = df[emotion] >= probability_threshold
472
- ax.scatter(df['Seconds'][anomalies], df[emotion][anomalies], color='red', s=25, zorder=5)
473
 
474
  max_seconds = df['Seconds'].max()
475
- num_ticks = 100
476
  tick_locations = np.linspace(0, max_seconds, num_ticks)
477
  tick_labels = [seconds_to_timecode(int(s)) for s in tick_locations]
478
 
479
  ax.set_xticks(tick_locations)
480
  ax.set_xticklabels(tick_labels, rotation=90, ha='center', fontsize=6)
481
 
482
- ax.set_xlabel('Timecode')
483
- ax.set_ylabel('Emotion Probability')
484
- ax.set_title(f"{emotion.capitalize()} Over Time")
485
-
486
- ax.grid(True, linestyle='--', alpha=0.7)
487
- ax.legend()
488
  plt.tight_layout()
489
  plt.close()
490
  return fig
491
 
492
- def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster, max_samples=500):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  face_samples = {"most_frequent": [], "others": []}
494
  for cluster_folder in sorted(os.listdir(organized_faces_folder)):
495
  if cluster_folder.startswith("person_"):
@@ -523,12 +660,18 @@ def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster,
523
  break
524
  return face_samples
525
 
 
526
  def process_video(video_path, anomaly_threshold, desired_fps, progress=gr.Progress()):
527
  start_time = time.time()
528
  output_folder = "output"
529
  os.makedirs(output_folder, exist_ok=True)
530
  batch_size = 16
531
 
 
 
 
 
 
532
  with tempfile.TemporaryDirectory() as temp_dir:
533
  aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
534
  organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
@@ -549,12 +692,13 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=gr.Progre
549
 
550
  progress(1, "Frame extraction complete")
551
  progress(0.3, "Processing frames")
552
- embeddings_by_frame, emotions_by_frame, aligned_face_paths = process_frames(frames_folder, aligned_faces_folder,
553
- frame_count,
554
- progress, batch_size)
 
555
 
556
  if not aligned_face_paths:
557
- return ("No faces were extracted from the video.",) + (None,) * 10
558
 
559
  progress(0.6, "Clustering faces")
560
  embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
@@ -565,7 +709,7 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=gr.Progre
565
  organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
566
 
567
  progress(0.8, "Saving person data")
568
- df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
569
  original_fps, temp_dir, video_duration)
570
 
571
  # Add 'Seconds' column to df
@@ -576,107 +720,220 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=gr.Progre
576
  face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
577
 
578
  progress(0.9, "Performing anomaly detection")
579
- emotion_columns = ['angry', 'disgust', 'fear', 'sad', 'happy']
580
  embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
581
 
582
- X_emotions = df[emotion_columns].values
583
  X_embeddings = df[embedding_columns].values
584
 
585
  try:
586
- mse_emotions, mse_embeddings = anomaly_detection(X_emotions, X_embeddings, batch_size=batch_size)
 
 
 
 
 
 
 
587
 
588
  progress(0.95, "Generating plots")
589
- mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Embeddings",
590
- color='green',
591
  anomaly_threshold=anomaly_threshold)
592
- mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Embeddings",
593
- anomaly_threshold, color='green')
594
 
595
- # Add emotion plots
596
- emotion_plots = []
597
- for emotion, color in zip(emotion_columns, ['purple', 'brown', 'green', 'orange', 'darkblue']):
598
- emotion_plot = plot_emotion(df, emotion, color, anomaly_threshold)
599
- emotion_plots.append(emotion_plot)
600
 
601
- mse_var_emotions = np.var(mse_emotions)
602
- mse_var_embeddings = np.var(mse_embeddings)
 
 
 
 
 
 
 
603
 
604
  except Exception as e:
605
  print(f"Error details: {str(e)}")
606
- return (f"Error in anomaly detection: {str(e)}",) + (None,) * 15
 
 
607
 
608
  progress(1.0, "Preparing results")
609
- results = f"Number of persons/clusters detected: {num_clusters}\n\n"
610
- results += f"Breakdown of persons/clusters:\n"
611
  for cluster_id in range(num_clusters):
612
- results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
 
613
 
614
  end_time = time.time()
615
  execution_time = end_time - start_time
616
 
617
- # Load anomaly frames as images
618
- anomaly_faces_embeddings = [
619
- cv2.imread(os.path.join(aligned_faces_folder, f"frame_{frame}_face.jpg"))
620
- for frame in anomaly_frames_embeddings
621
- if os.path.exists(os.path.join(aligned_faces_folder, f"frame_{frame}_face.jpg"))
622
- ]
623
- anomaly_faces_embeddings = [cv2.cvtColor(face, cv2.COLOR_BGR2RGB) for face in anomaly_faces_embeddings if face is not None]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
 
625
  return (
626
  execution_time,
627
  results,
628
  df,
629
  mse_embeddings,
630
- mse_emotions,
631
  mse_plot_embeddings,
632
  mse_histogram_embeddings,
633
- *emotion_plots,
 
 
 
634
  face_samples["most_frequent"],
635
  face_samples["others"],
636
  anomaly_faces_embeddings,
637
- aligned_faces_folder
 
 
638
  )
639
 
 
640
  with gr.Blocks() as iface:
641
- gr.Markdown("# Facial Expressions Anomaly Detection")
 
 
 
 
 
 
 
 
642
 
643
  with gr.Row():
644
  video_input = gr.Video()
645
- anomaly_threshold = gr.Slider(minimum=1, maximum=7, step=0.1, value=4.5, label="Anomaly Detection Threshold")
646
- fps_slider = gr.Slider(minimum=10, maximum=20, step=5, value=20, label="Frames Per Second")
647
 
 
648
  process_btn = gr.Button("Process Video")
649
-
650
  execution_time = gr.Number(label="Execution Time (seconds)")
651
- results_text = gr.Textbox(label="Anomaly Detection Results")
652
 
653
- anomaly_frames_embeddings = gr.Gallery(label="Anomaly Frames (Facial Embeddings)", columns=6, rows=2, height="auto")
 
654
 
655
- mse_embeddings_plot = gr.Plot(label="MSE: Facial Embeddings")
656
- mse_embeddings_hist = gr.Plot(label="MSE Distribution: Facial Embeddings")
 
 
 
657
 
658
- # Add emotion plots
659
- emotion_plots = [gr.Plot(label=f"{emotion.capitalize()} Over Time") for emotion in ['angry', 'disgust', 'fear', 'sad', 'happy']]
 
 
 
660
 
661
- face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples (Target)", columns=6, rows=2, height="auto")
662
- face_samples_others = gr.Gallery(label="Other Persons Samples", columns=6, rows=1, height="auto")
 
663
 
664
  # Hidden components to store intermediate results
665
  df_store = gr.State()
666
- mse_emotions_store = gr.State()
667
- mse_embeddings_store = gr.State()
668
  aligned_faces_folder_store = gr.State()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669
 
670
  process_btn.click(
671
- process_video,
672
- inputs=[video_input, anomaly_threshold, fps_slider],
673
  outputs=[
674
- execution_time, results_text, df_store, mse_embeddings_store, mse_emotions_store,
675
- mse_embeddings_plot, mse_embeddings_hist,
676
- *emotion_plots,
677
- face_samples_most_frequent, face_samples_others, anomaly_frames_embeddings,
678
- aligned_faces_folder_store
 
 
 
 
679
  ]
 
 
 
 
680
  )
681
 
682
  if __name__ == "__main__":
 
8
  from facenet_pytorch import InceptionResnetV1, MTCNN
9
  import tensorflow as tf
10
  import mediapipe as mp
 
11
  from sklearn.cluster import DBSCAN
12
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
13
  import pandas as pd
14
  import matplotlib
15
  import matplotlib.pyplot as plt
16
+ import seaborn as sns
17
  from matplotlib.patches import Rectangle
18
  from moviepy.editor import VideoFileClip
19
+ from PIL import Image, ImageDraw, ImageFont
20
  import gradio as gr
21
  import tempfile
22
  import shutil
 
23
  import time
24
 
25
+
26
+ matplotlib.rcParams['figure.dpi'] = 400
27
+ matplotlib.rcParams['savefig.dpi'] = 400
28
 
29
  # Initialize models and other global variables
30
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
+
32
+ FIXED_FPS = 20
33
 
34
+ mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.95, 0.95, 0.95], min_face_size=100)
35
  model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
36
+
37
  mp_face_mesh = mp.solutions.face_mesh
38
+ face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
39
+
40
+ mp_pose = mp.solutions.pose
41
+ mp_drawing = mp.solutions.drawing_utils
42
+ pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.8, min_tracking_confidence=0.8)
43
 
44
  def frame_to_timecode(frame_num, total_frames, duration):
45
  total_seconds = (frame_num / total_frames) * duration
 
59
  h, m, s = map(int, timecode.split(':'))
60
  return h * 3600 + m * 60 + s
61
 
62
+ def get_face_embedding(face_img):
63
  face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
64
  face_tensor = (face_tensor - 0.5) / 0.5
65
  face_tensor = face_tensor.to(device)
66
  with torch.no_grad():
67
  embedding = model(face_tensor)
68
+ return embedding.cpu().numpy().flatten()
 
 
 
 
 
 
 
69
 
70
  def alignFace(img):
71
  img_raw = img.copy()
 
92
  new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
93
  return new_img
94
 
95
+ def calculate_posture_score(frame):
96
+ image_height, image_width, _ = frame.shape
97
+ results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
98
+
99
+ if not results.pose_landmarks:
100
+ return None, None
101
+
102
+ landmarks = results.pose_landmarks.landmark
103
+
104
+ # Use only body landmarks
105
+ left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
106
+ right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
107
+ left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value]
108
+ right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value]
109
+ left_knee = landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value]
110
+ right_knee = landmarks[mp_pose.PoseLandmark.RIGHT_KNEE.value]
111
+
112
+ # Calculate angles
113
+ shoulder_angle = abs(math.degrees(math.atan2(right_shoulder.y - left_shoulder.y, right_shoulder.x - left_shoulder.x)))
114
+ hip_angle = abs(math.degrees(math.atan2(right_hip.y - left_hip.y, right_hip.x - left_hip.x)))
115
+ knee_angle = abs(math.degrees(math.atan2(right_knee.y - left_knee.y, right_knee.x - left_knee.x)))
116
+
117
+ # Calculate vertical alignment
118
+ shoulder_hip_alignment = abs((left_shoulder.y + right_shoulder.y) / 2 - (left_hip.y + right_hip.y) / 2)
119
+ hip_knee_alignment = abs((left_hip.y + right_hip.y) / 2 - (left_knee.y + right_knee.y) / 2)
120
+ # Add head landmarks
121
+ nose = landmarks[mp_pose.PoseLandmark.NOSE.value]
122
+ left_ear = landmarks[mp_pose.PoseLandmark.LEFT_EAR.value]
123
+ right_ear = landmarks[mp_pose.PoseLandmark.RIGHT_EAR.value]
124
+ # Calculate head tilt
125
+ head_tilt = abs(math.degrees(math.atan2(right_ear.y - left_ear.y, right_ear.x - left_ear.x)))
126
+ # Calculate head position relative to shoulders
127
+ head_position = abs((nose.y - (left_shoulder.y + right_shoulder.y) / 2) /
128
+ ((left_shoulder.y + right_shoulder.y) / 2 - (left_hip.y + right_hip.y) / 2))
129
+
130
+ # Combine metrics into a single posture score (you may need to adjust these weights)
131
+ posture_score = (
132
+ (1 - abs(shoulder_angle - hip_angle) / 90) * 0.3 +
133
+ (1 - abs(hip_angle - knee_angle) / 90) * 0.2 +
134
+ (1 - shoulder_hip_alignment) * 0.1 +
135
+ (1 - hip_knee_alignment) * 0.1 +
136
+ (1 - abs(head_tilt - 90) / 90) * 0.15 +
137
+ (1 - head_position) * 0.15
138
+ )
139
+
140
+ return posture_score, results.pose_landmarks
141
+
142
  def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
143
  os.makedirs(output_folder, exist_ok=True)
144
  clip = VideoFileClip(video_path)
 
176
  angle_degrees = math.degrees(angle)
177
  return abs(180 - angle_degrees) < threshold
178
 
179
+
180
  def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size):
181
  embeddings_by_frame = {}
182
  emotions_by_frame = {}
183
+ posture_scores_by_frame = {}
184
+ posture_landmarks_by_frame = {}
185
  aligned_face_paths = []
186
  frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith('.jpg')])
187
 
 
203
 
204
  for j, (frame, frame_num, boxes, probs) in enumerate(
205
  zip(batch_frames, batch_nums, batch_boxes, batch_probs)):
206
+
207
+ # Calculate posture score for the full frame
208
+ posture_score, posture_landmarks = calculate_posture_score(frame)
209
+ posture_scores_by_frame[frame_num] = posture_score
210
+ posture_landmarks_by_frame[frame_num] = posture_landmarks
211
+
212
  if boxes is not None and len(boxes) > 0 and probs[0] >= 0.99:
213
  x1, y1, x2, y2 = [int(b) for b in boxes[0]]
214
  face = frame[y1:y2, x1:x2]
 
221
  output_path = os.path.join(aligned_faces_folder, f"frame_{frame_num}_face.jpg")
222
  cv2.imwrite(output_path, aligned_face_resized)
223
  aligned_face_paths.append(output_path)
224
+ embedding = get_face_embedding(aligned_face_resized)
225
  embeddings_by_frame[frame_num] = embedding
 
226
 
227
  progress((i + len(batch_files)) / len(frame_files),
228
  f"Processing frames {i + 1} to {min(i + len(batch_files), len(frame_files))} of {len(frame_files)}")
229
 
230
+ return embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths
231
+
232
 
233
  def cluster_faces(embeddings):
234
  if len(embeddings) < 2:
 
253
  dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
254
  shutil.copy(src, dst)
255
 
256
+ def save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps, original_fps, output_folder, video_duration):
 
257
  person_data = {}
258
 
259
+ for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
260
  if cluster not in person_data:
261
  person_data[cluster] = []
262
+ person_data[cluster].append((frame_num, embedding))
263
 
264
  largest_cluster = max(person_data, key=lambda k: len(person_data[k]))
265
 
266
  data = person_data[largest_cluster]
267
  data.sort(key=lambda x: x[0])
268
+ frames, embeddings = zip(*data)
269
 
270
  embeddings_array = np.array(embeddings)
271
  np.save(os.path.join(output_folder, 'face_embeddings.npy'), embeddings_array)
 
282
  for i in range(len(embeddings[0])):
283
  df_data[f'Raw_Embedding_{i}'] = [embedding[i] for embedding in embeddings]
284
 
 
 
 
285
  df = pd.DataFrame(df_data)
286
 
287
  return df, largest_cluster
 
321
  anomalies = mse_values > (mean + threshold * std)
322
  return anomalies
323
 
324
+ def anomaly_detection(X_embeddings, X_posture, epochs=200, batch_size=8, patience=3):
325
+ # Normalize posture
326
+ scaler_posture = MinMaxScaler()
327
+ X_posture_scaled = scaler_posture.fit_transform(X_posture.reshape(-1, 1))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
 
329
  # Process facial embeddings
330
  X_embeddings = torch.FloatTensor(X_embeddings).to(device)
331
  if X_embeddings.dim() == 2:
332
  X_embeddings = X_embeddings.unsqueeze(0)
333
 
334
+ # Process posture
335
+ X_posture_scaled = torch.FloatTensor(X_posture_scaled).to(device)
336
+ if X_posture_scaled.dim() == 2:
337
+ X_posture_scaled = X_posture_scaled.unsqueeze(0)
338
+
339
  model_embeddings = Autoencoder(input_size=X_embeddings.shape[2]).to(device)
340
+ model_posture = Autoencoder(input_size=X_posture_scaled.shape[2]).to(device)
341
+
342
+ criterion = nn.MSELoss()
343
  optimizer_embeddings = optim.Adam(model_embeddings.parameters())
344
+ optimizer_posture = optim.Adam(model_posture.parameters())
345
 
346
+ # Train models
347
  for epoch in range(epochs):
348
+ for model, optimizer, X in [(model_embeddings, optimizer_embeddings, X_embeddings),
349
+ (model_posture, optimizer_posture, X_posture_scaled)]:
350
+ model.train()
351
+ optimizer.zero_grad()
352
+ output = model(X)
353
+ loss = criterion(output, X)
354
+ loss.backward()
355
+ optimizer.step()
356
+
357
+ # Compute MSE for embeddings and posture
358
  model_embeddings.eval()
359
+ model_posture.eval()
360
  with torch.no_grad():
 
361
  reconstructed_embeddings = model_embeddings(X_embeddings).cpu().numpy()
362
+ reconstructed_posture = model_posture(X_posture_scaled).cpu().numpy()
363
 
 
364
  mse_embeddings = np.mean(np.power(X_embeddings.cpu().numpy() - reconstructed_embeddings, 2), axis=2).squeeze()
365
+ mse_posture = np.mean(np.power(X_posture_scaled.cpu().numpy() - reconstructed_posture, 2), axis=2).squeeze()
366
 
367
+ return mse_embeddings, mse_posture
368
 
369
+ def plot_mse(df, mse_values, title, color='navy', time_threshold=3, anomaly_threshold=4):
370
+ plt.figure(figsize=(16, 8), dpi=400)
371
  fig, ax = plt.subplots(figsize=(16, 8))
372
 
373
  if 'Seconds' not in df.columns:
 
389
  median = np.median(mse_values)
390
 
391
  ax.scatter(df['Seconds'], mse_values, color=color, alpha=0.3, s=5)
392
+ ax.plot(df['Seconds'], mean, color=color, linewidth=0.5)
393
+ ax.fill_between(df['Seconds'], mean - std, mean + std, color=color, alpha=0.1)
394
 
395
  # Add median line
396
+ ax.axhline(y=median, color='black', linestyle='--', label='Median Baseline')
 
397
 
398
  # Add threshold line
399
  threshold = np.mean(mse_values) + anomaly_threshold * np.std(mse_values)
 
403
  anomalies = determine_anomalies(mse_values, anomaly_threshold)
404
  anomaly_frames = df['Frame'].iloc[anomalies].tolist()
405
 
406
+ ax.scatter(df['Seconds'].iloc[anomalies], mse_values[anomalies], color='red', s=20, zorder=5)
407
 
408
  anomaly_data = list(zip(df['Timecode'].iloc[anomalies],
409
  df['Seconds'].iloc[anomalies],
 
425
  start_sec = group[0][1]
426
  end_sec = group[-1][1]
427
  rect = Rectangle((start_sec, ax.get_ylim()[0]), end_sec - start_sec, ax.get_ylim()[1] - ax.get_ylim()[0],
428
+ facecolor='red', alpha=0.2, zorder=1)
429
  ax.add_patch(rect)
430
 
431
  for group in grouped_anomalies:
 
453
  return fig, anomaly_frames
454
 
455
  def plot_mse_histogram(mse_values, title, anomaly_threshold, color='blue'):
456
+ plt.figure(figsize=(16, 4), dpi=400)
457
+ fig, ax = plt.subplots(figsize=(16, 4))
458
 
459
  ax.hist(mse_values, bins=100, edgecolor='black', color=color, alpha=0.7)
460
  ax.set_xlabel('Mean Squared Error')
 
481
  return fig
482
 
483
 
484
+ def plot_posture(df, posture_scores, color='blue', anomaly_threshold=4):
485
+ plt.figure(figsize=(16, 8), dpi=400)
486
  fig, ax = plt.subplots(figsize=(16, 8))
487
 
488
  df['Seconds'] = df['Timecode'].apply(
489
  lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
490
 
491
+ posture_data = [(frame, score) for frame, score in posture_scores.items() if score is not None]
492
+ posture_frames, posture_scores = zip(*posture_data)
 
 
 
 
 
493
 
494
+ # Create a new dataframe for posture data
495
+ posture_df = pd.DataFrame({'Frame': posture_frames, 'Score': posture_scores})
496
+ posture_df = posture_df.merge(df[['Frame', 'Seconds']], on='Frame', how='inner')
497
 
498
+ ax.scatter(posture_df['Seconds'], posture_df['Score'], color=color, alpha=0.3, s=5)
499
+ mean = posture_df['Score'].rolling(window=10).mean()
500
+ ax.plot(posture_df['Seconds'], mean, color=color, linewidth=0.5)
501
 
502
+ ax.set_xlabel('Timecode')
503
+ ax.set_ylabel('Posture Score')
504
+ ax.set_title("Body Posture Over Time")
 
505
 
506
+ ax.grid(True, linestyle='--', alpha=0.7)
 
 
507
 
508
  max_seconds = df['Seconds'].max()
509
+ num_ticks = 80
510
  tick_locations = np.linspace(0, max_seconds, num_ticks)
511
  tick_labels = [seconds_to_timecode(int(s)) for s in tick_locations]
512
 
513
  ax.set_xticks(tick_locations)
514
  ax.set_xticklabels(tick_labels, rotation=90, ha='center', fontsize=6)
515
 
 
 
 
 
 
 
516
  plt.tight_layout()
517
  plt.close()
518
  return fig
519
 
520
+
521
+ def plot_mse_heatmap(mse_values, title, df):
522
+ plt.figure(figsize=(20, 10), dpi=400)
523
+ fig, ax = plt.subplots(figsize=(20, 10))
524
+
525
+ # Reshape MSE values to 2D array for heatmap
526
+ mse_2d = mse_values.reshape(1, -1)
527
+
528
+ # Create heatmap
529
+ sns.heatmap(mse_2d, cmap='YlOrRd', cbar_kws={'label': 'MSE'}, ax=ax)
530
+
531
+ # Set x-axis ticks to timecodes
532
+ num_ticks = 60
533
+ tick_locations = np.linspace(0, len(mse_values) - 1, num_ticks).astype(int)
534
+ tick_labels = [df['Timecode'].iloc[i] for i in tick_locations]
535
+
536
+ ax.set_xticks(tick_locations)
537
+ ax.set_xticklabels(tick_labels, rotation=90, ha='center', va='top') # Adjusted rotation and alignment
538
+
539
+ ax.set_title(title)
540
+
541
+ # Remove y-axis labels
542
+ ax.set_yticks([])
543
+
544
+ plt.tight_layout() # Ensure all elements fit within the figure
545
+ plt.close()
546
+ return fig
547
+
548
+ def draw_pose_landmarks(frame, landmarks):
549
+ annotated_frame = frame.copy()
550
+ # Include relevant landmarks for head position and body
551
+ body_landmarks = [
552
+ mp_pose.PoseLandmark.NOSE,
553
+ mp_pose.PoseLandmark.LEFT_SHOULDER,
554
+ mp_pose.PoseLandmark.RIGHT_SHOULDER,
555
+ mp_pose.PoseLandmark.LEFT_EAR,
556
+ mp_pose.PoseLandmark.RIGHT_EAR,
557
+ mp_pose.PoseLandmark.LEFT_ELBOW,
558
+ mp_pose.PoseLandmark.RIGHT_ELBOW,
559
+ mp_pose.PoseLandmark.LEFT_WRIST,
560
+ mp_pose.PoseLandmark.RIGHT_WRIST,
561
+ mp_pose.PoseLandmark.LEFT_HIP,
562
+ mp_pose.PoseLandmark.RIGHT_HIP,
563
+ mp_pose.PoseLandmark.LEFT_KNEE,
564
+ mp_pose.PoseLandmark.RIGHT_KNEE,
565
+ mp_pose.PoseLandmark.LEFT_ANKLE,
566
+ mp_pose.PoseLandmark.RIGHT_ANKLE
567
+ ]
568
+
569
+ # Connections for head position and body
570
+ body_connections = [
571
+ (mp_pose.PoseLandmark.LEFT_EAR, mp_pose.PoseLandmark.LEFT_SHOULDER),
572
+ (mp_pose.PoseLandmark.RIGHT_EAR, mp_pose.PoseLandmark.RIGHT_SHOULDER),
573
+ (mp_pose.PoseLandmark.NOSE, mp_pose.PoseLandmark.LEFT_SHOULDER),
574
+ (mp_pose.PoseLandmark.NOSE, mp_pose.PoseLandmark.RIGHT_SHOULDER),
575
+ (mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.RIGHT_SHOULDER),
576
+ (mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.LEFT_ELBOW),
577
+ (mp_pose.PoseLandmark.RIGHT_SHOULDER, mp_pose.PoseLandmark.RIGHT_ELBOW),
578
+ (mp_pose.PoseLandmark.LEFT_ELBOW, mp_pose.PoseLandmark.LEFT_WRIST),
579
+ (mp_pose.PoseLandmark.RIGHT_ELBOW, mp_pose.PoseLandmark.RIGHT_WRIST),
580
+ (mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.LEFT_HIP),
581
+ (mp_pose.PoseLandmark.RIGHT_SHOULDER, mp_pose.PoseLandmark.RIGHT_HIP),
582
+ (mp_pose.PoseLandmark.LEFT_HIP, mp_pose.PoseLandmark.RIGHT_HIP),
583
+ (mp_pose.PoseLandmark.LEFT_HIP, mp_pose.PoseLandmark.LEFT_KNEE),
584
+ (mp_pose.PoseLandmark.RIGHT_HIP, mp_pose.PoseLandmark.RIGHT_KNEE),
585
+ (mp_pose.PoseLandmark.LEFT_KNEE, mp_pose.PoseLandmark.LEFT_ANKLE),
586
+ (mp_pose.PoseLandmark.RIGHT_KNEE, mp_pose.PoseLandmark.RIGHT_ANKLE)
587
+ ]
588
+
589
+ # Draw landmarks
590
+ for landmark in body_landmarks:
591
+ if landmark in landmarks.landmark:
592
+ lm = landmarks.landmark[landmark]
593
+ h, w, _ = annotated_frame.shape
594
+ cx, cy = int(lm.x * w), int(lm.y * h)
595
+ cv2.circle(annotated_frame, (cx, cy), 5, (245, 117, 66), -1)
596
+
597
+ # Draw connections
598
+ for connection in body_connections:
599
+ start_lm = landmarks.landmark[connection[0]]
600
+ end_lm = landmarks.landmark[connection[1]]
601
+ h, w, _ = annotated_frame.shape
602
+ start_point = (int(start_lm.x * w), int(start_lm.y * h))
603
+ end_point = (int(end_lm.x * w), int(end_lm.y * h))
604
+ cv2.line(annotated_frame, start_point, end_point, (245, 66, 230), 2)
605
+
606
+ # Highlight head tilt
607
+ left_ear = landmarks.landmark[mp_pose.PoseLandmark.LEFT_EAR]
608
+ right_ear = landmarks.landmark[mp_pose.PoseLandmark.RIGHT_EAR]
609
+ nose = landmarks.landmark[mp_pose.PoseLandmark.NOSE]
610
+
611
+ h, w, _ = annotated_frame.shape
612
+ left_ear_point = (int(left_ear.x * w), int(left_ear.y * h))
613
+ right_ear_point = (int(right_ear.x * w), int(right_ear.y * h))
614
+ nose_point = (int(nose.x * w), int(nose.y * h))
615
+
616
+ # Draw a line between ears to show head tilt
617
+ cv2.line(annotated_frame, left_ear_point, right_ear_point, (0, 255, 0), 2)
618
+
619
+ # Draw a line from nose to the midpoint between shoulders to show head forward/backward tilt
620
+ left_shoulder = landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER]
621
+ right_shoulder = landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER]
622
+ shoulder_mid_x = (left_shoulder.x + right_shoulder.x) / 2
623
+ shoulder_mid_y = (left_shoulder.y + right_shoulder.y) / 2
624
+ shoulder_mid_point = (int(shoulder_mid_x * w), int(shoulder_mid_y * h))
625
+ cv2.line(annotated_frame, nose_point, shoulder_mid_point, (0, 255, 0), 2)
626
+
627
+ return annotated_frame
628
+
629
+ def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster, max_samples=400):
630
  face_samples = {"most_frequent": [], "others": []}
631
  for cluster_folder in sorted(os.listdir(organized_faces_folder)):
632
  if cluster_folder.startswith("person_"):
 
660
  break
661
  return face_samples
662
 
663
+
664
  def process_video(video_path, anomaly_threshold, desired_fps, progress=gr.Progress()):
665
  start_time = time.time()
666
  output_folder = "output"
667
  os.makedirs(output_folder, exist_ok=True)
668
  batch_size = 16
669
 
670
+ GRAPH_COLORS = {
671
+ 'facial_embeddings': 'navy',
672
+ 'body_posture': 'purple'
673
+ }
674
+
675
  with tempfile.TemporaryDirectory() as temp_dir:
676
  aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
677
  organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
 
692
 
693
  progress(1, "Frame extraction complete")
694
  progress(0.3, "Processing frames")
695
+ embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths = process_frames(
696
+ frames_folder, aligned_faces_folder,
697
+ frame_count,
698
+ progress, batch_size)
699
 
700
  if not aligned_face_paths:
701
+ raise ValueError("No faces were extracted from the video.")
702
 
703
  progress(0.6, "Clustering faces")
704
  embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
 
709
  organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
710
 
711
  progress(0.8, "Saving person data")
712
+ df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps,
713
  original_fps, temp_dir, video_duration)
714
 
715
  # Add 'Seconds' column to df
 
720
  face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
721
 
722
  progress(0.9, "Performing anomaly detection")
 
723
  embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
724
 
 
725
  X_embeddings = df[embedding_columns].values
726
 
727
  try:
728
+ X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
729
+ X_posture = X_posture[X_posture != None].reshape(-1, 1) # Remove None values and reshape
730
+
731
+ # Ensure X_posture is not empty
732
+ if len(X_posture) == 0:
733
+ raise ValueError("No valid posture data found")
734
+
735
+ mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture, batch_size=batch_size)
736
 
737
  progress(0.95, "Generating plots")
738
+ mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
739
+ color=GRAPH_COLORS['facial_embeddings'],
740
  anomaly_threshold=anomaly_threshold)
 
 
741
 
742
+ mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
743
+ anomaly_threshold, color=GRAPH_COLORS['facial_embeddings'])
 
 
 
744
 
745
+ mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
746
+ color=GRAPH_COLORS['body_posture'],
747
+ anomaly_threshold=anomaly_threshold)
748
+
749
+ mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
750
+ anomaly_threshold, color=GRAPH_COLORS['body_posture'])
751
+
752
+ mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
753
+ mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
754
 
755
  except Exception as e:
756
  print(f"Error details: {str(e)}")
757
+ import traceback
758
+ traceback.print_exc()
759
+ return (f"Error in video processing: {str(e)}",) + (None,) * 14
760
 
761
  progress(1.0, "Preparing results")
762
+ results = f"Number of persons detected: {num_clusters}\n\n"
763
+ results += "Breakdown:\n"
764
  for cluster_id in range(num_clusters):
765
+ face_count = len([c for c in clusters if c == cluster_id])
766
+ results += f"Person {cluster_id + 1}: {face_count} face frames\n"
767
 
768
  end_time = time.time()
769
  execution_time = end_time - start_time
770
 
771
+ def add_timecode_to_image(image, timecode):
772
+ img_pil = Image.fromarray(image)
773
+ draw = ImageDraw.Draw(img_pil)
774
+ font = ImageFont.truetype("arial.ttf", 15)
775
+ draw.text((10, 10), timecode, (255, 0, 0), font=font)
776
+ return np.array(img_pil)
777
+
778
+ # In the process_video function, update the anomaly frame processing:
779
+ anomaly_faces_embeddings = []
780
+ for frame in anomaly_frames_embeddings:
781
+ face_path = os.path.join(aligned_faces_folder, f"frame_{frame}_face.jpg")
782
+ if os.path.exists(face_path):
783
+ face_img = cv2.imread(face_path)
784
+ if face_img is not None:
785
+ face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
786
+ timecode = df[df['Frame'] == frame]['Timecode'].iloc[0]
787
+ face_img_with_timecode = add_timecode_to_image(face_img, timecode)
788
+ anomaly_faces_embeddings.append(face_img_with_timecode)
789
+
790
+ anomaly_frames_posture_images = []
791
+ for frame in anomaly_frames_posture:
792
+ frame_path = os.path.join(frames_folder, f"frame_{frame:04d}.jpg")
793
+ if os.path.exists(frame_path):
794
+ frame_img = cv2.imread(frame_path)
795
+ if frame_img is not None:
796
+ frame_img = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)
797
+ pose_results = pose.process(frame_img)
798
+ if pose_results.pose_landmarks:
799
+ frame_img = draw_pose_landmarks(frame_img, pose_results.pose_landmarks)
800
+ timecode = df[df['Frame'] == frame]['Timecode'].iloc[0]
801
+ frame_img_with_timecode = add_timecode_to_image(frame_img, timecode)
802
+ anomaly_frames_posture_images.append(frame_img_with_timecode)
803
 
804
  return (
805
  execution_time,
806
  results,
807
  df,
808
  mse_embeddings,
809
+ mse_posture,
810
  mse_plot_embeddings,
811
  mse_histogram_embeddings,
812
+ mse_plot_posture,
813
+ mse_histogram_posture,
814
+ mse_heatmap_embeddings,
815
+ mse_heatmap_posture,
816
  face_samples["most_frequent"],
817
  face_samples["others"],
818
  anomaly_faces_embeddings,
819
+ anomaly_frames_posture_images,
820
+ aligned_faces_folder,
821
+ frames_folder
822
  )
823
 
824
+
825
  with gr.Blocks() as iface:
826
+ gr.Markdown("""
827
+ # Facial Expression and Body Language Anomaly Detection
828
+
829
+ This application analyzes videos to detect anomalies in facial features and body language.
830
+ It processes the video frames to extract facial embeddings and body posture,
831
+ then uses machine learning techniques to identify unusual patterns or deviations from the norm.
832
+
833
+ For more information, visit: [https://github.com/reab5555/Facial-Expression-Anomaly-Detection](https://github.com/reab5555/Facial-Expression-Anomaly-Detection)
834
+ """)
835
 
836
  with gr.Row():
837
  video_input = gr.Video()
 
 
838
 
839
+ anomaly_threshold = gr.Slider(minimum=1, maximum=5, step=0.1, value=3.5, label="Anomaly Detection Threshold")
840
  process_btn = gr.Button("Process Video")
841
+ progress_bar = gr.Progress()
842
  execution_time = gr.Number(label="Execution Time (seconds)")
 
843
 
844
+ with gr.Group(visible=False) as results_group:
845
+ results_text = gr.TextArea(label="Anomaly Detection Results", lines=6)
846
 
847
+ with gr.Tab("Facial Features"):
848
+ mse_features_plot = gr.Plot(label="MSE: Facial Features")
849
+ mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
850
+ mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
851
+ anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
852
 
853
+ with gr.Tab("Body Posture"):
854
+ mse_posture_plot = gr.Plot(label="MSE: Body Posture")
855
+ mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
856
+ mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
857
+ anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
858
 
859
+ with gr.Tab("Face Samples"):
860
+ face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples (Target)", columns=6, rows=2, height="auto")
861
+ face_samples_others = gr.Gallery(label="Other Persons Samples", columns=6, rows=1, height="auto")
862
 
863
  # Hidden components to store intermediate results
864
  df_store = gr.State()
865
+ mse_features_store = gr.State()
866
+ mse_posture_store = gr.State()
867
  aligned_faces_folder_store = gr.State()
868
+ frames_folder_store = gr.State()
869
+ mse_heatmap_embeddings_store = gr.State()
870
+ mse_heatmap_posture_store = gr.State()
871
+
872
+ def process_and_show_completion(video_input_path, anomaly_threshold_input):
873
+ try:
874
+ print("Starting video processing...")
875
+ results = process_video(video_input_path, anomaly_threshold_input, FIXED_FPS, progress=progress_bar)
876
+ print("Video processing completed.")
877
+
878
+ if isinstance(results[0], str) and results[0].startswith("Error"):
879
+ print(f"Error occurred: {results[0]}")
880
+ return [results[0]] + [None] * 18 # Update this line to match the number of outputs
881
+
882
+ exec_time, results_summary, df, mse_embeddings, mse_posture, \
883
+ mse_plot_embeddings, mse_histogram_embeddings, \
884
+ mse_plot_posture, mse_histogram_posture, \
885
+ mse_heatmap_embeddings, mse_heatmap_posture, \
886
+ face_samples_frequent, face_samples_other, \
887
+ anomaly_faces_embeddings, anomaly_frames_posture_images, \
888
+ aligned_faces_folder, frames_folder = results
889
+
890
+ # Convert numpy arrays to PIL Images for the galleries
891
+ anomaly_faces_embeddings_pil = [Image.fromarray(face) for face in anomaly_faces_embeddings]
892
+ anomaly_frames_posture_pil = [Image.fromarray(frame) for frame in anomaly_frames_posture_images]
893
+
894
+ # Ensure face samples are in the correct format for Gradio
895
+ face_samples_frequent = [Image.open(path) for path in face_samples_frequent]
896
+ face_samples_other = [Image.open(path) for path in face_samples_other]
897
+
898
+ output = [
899
+ exec_time, results_summary,
900
+ df, mse_embeddings, mse_posture,
901
+ mse_plot_embeddings, mse_plot_posture,
902
+ mse_histogram_embeddings, mse_histogram_posture,
903
+ mse_heatmap_embeddings, mse_heatmap_posture,
904
+ anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
905
+ face_samples_frequent, face_samples_other,
906
+ aligned_faces_folder, frames_folder,
907
+ mse_embeddings, mse_posture
908
+ ]
909
+
910
+ return output
911
+
912
+ except Exception as e:
913
+ error_message = f"An error occurred: {str(e)}"
914
+ print(error_message)
915
+ import traceback
916
+ traceback.print_exc()
917
+ return [error_message] + [None] * 18
918
 
919
  process_btn.click(
920
+ process_and_show_completion,
921
+ inputs=[video_input, anomaly_threshold],
922
  outputs=[
923
+ execution_time, results_text, df_store,
924
+ mse_features_store, mse_posture_store,
925
+ mse_features_plot, mse_posture_plot,
926
+ mse_features_hist, mse_posture_hist,
927
+ mse_features_heatmap, mse_posture_heatmap,
928
+ anomaly_frames_features, anomaly_frames_posture,
929
+ face_samples_most_frequent, face_samples_others,
930
+ aligned_faces_folder_store, frames_folder_store,
931
+ mse_heatmap_embeddings_store, mse_heatmap_posture_store
932
  ]
933
+ ).then(
934
+ lambda: gr.Group(visible=True),
935
+ inputs=None,
936
+ outputs=[results_group]
937
  )
938
 
939
  if __name__ == "__main__":