reab5555 commited on
Commit
ce8fdc1
·
verified ·
1 Parent(s): 5286b18

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -943
app.py DELETED
@@ -1,943 +0,0 @@
1
- import math
2
- import os
3
- import cv2
4
- import numpy as np
5
- import torch
6
- import torch.nn as nn
7
- import torch.optim as optim
8
- from facenet_pytorch import InceptionResnetV1, MTCNN
9
- import tensorflow as tf
10
- import mediapipe as mp
11
- from sklearn.cluster import DBSCAN
12
- from sklearn.preprocessing import StandardScaler, MinMaxScaler
13
- import pandas as pd
14
- import matplotlib
15
- import matplotlib.pyplot as plt
16
- import seaborn as sns
17
- from matplotlib.patches import Rectangle
18
- from moviepy.editor import VideoFileClip
19
- from PIL import Image, ImageDraw, ImageFont
20
- import gradio as gr
21
- import tempfile
22
- import shutil
23
- import time
24
-
25
-
26
- matplotlib.rcParams['figure.dpi'] = 400
27
- matplotlib.rcParams['savefig.dpi'] = 400
28
-
29
- # Initialize models and other global variables
30
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
-
32
- FIXED_FPS = 5
33
-
34
- mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.95, 0.95, 0.95], min_face_size=80)
35
- model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
36
-
37
- mp_face_mesh = mp.solutions.face_mesh
38
- face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
39
-
40
- mp_pose = mp.solutions.pose
41
- mp_drawing = mp.solutions.drawing_utils
42
- pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.8, min_tracking_confidence=0.8)
43
-
44
- def frame_to_timecode(frame_num, total_frames, duration):
45
- total_seconds = (frame_num / total_frames) * duration
46
- hours = int(total_seconds // 3600)
47
- minutes = int((total_seconds % 3600) // 60)
48
- seconds = int(total_seconds % 60)
49
- milliseconds = int((total_seconds - int(total_seconds)) * 1000)
50
- return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
51
-
52
- def seconds_to_timecode(seconds):
53
- hours = int(seconds // 3600)
54
- minutes = int((seconds % 3600) // 60)
55
- seconds = int(seconds % 60)
56
- return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
57
-
58
- def timecode_to_seconds(timecode):
59
- h, m, s = map(int, timecode.split(':'))
60
- return h * 3600 + m * 60 + s
61
-
62
- def get_face_embedding(face_img):
63
- face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
64
- face_tensor = (face_tensor - 0.5) / 0.5
65
- face_tensor = face_tensor.to(device)
66
- with torch.no_grad():
67
- embedding = model(face_tensor)
68
- return embedding.cpu().numpy().flatten()
69
-
70
- def alignFace(img):
71
- img_raw = img.copy()
72
- results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
73
- if not results.multi_face_landmarks:
74
- return None
75
- landmarks = results.multi_face_landmarks[0].landmark
76
- left_eye = np.array([[landmarks[33].x, landmarks[33].y], [landmarks[160].x, landmarks[160].y],
77
- [landmarks[158].x, landmarks[158].y], [landmarks[144].x, landmarks[144].y],
78
- [landmarks[153].x, landmarks[153].y], [landmarks[145].x, landmarks[145].y]])
79
- right_eye = np.array([[landmarks[362].x, landmarks[362].y], [landmarks[385].x, landmarks[385].y],
80
- [landmarks[387].x, landmarks[387].y], [landmarks[263].x, landmarks[263].y],
81
- [landmarks[373].x, landmarks[373].y], [landmarks[380].x, landmarks[380].y]])
82
- left_eye_center = left_eye.mean(axis=0).astype(np.int32)
83
- right_eye_center = right_eye.mean(axis=0).astype(np.int32)
84
- dY = right_eye_center[1] - left_eye_center[1]
85
- dX = right_eye_center[0] - left_eye_center[0]
86
- angle = np.degrees(np.arctan2(dY, dX))
87
- desired_angle = 0
88
- angle_diff = desired_angle - angle
89
- height, width = img_raw.shape[:2]
90
- center = (width // 2, height // 2)
91
- rotation_matrix = cv2.getRotationMatrix2D(center, angle_diff, 1)
92
- new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
93
- return new_img
94
-
95
- def calculate_posture_score(frame):
96
- image_height, image_width, _ = frame.shape
97
- results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
98
-
99
- if not results.pose_landmarks:
100
- return None, None
101
-
102
- landmarks = results.pose_landmarks.landmark
103
-
104
- # Use only body landmarks
105
- left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
106
- right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
107
- left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value]
108
- right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value]
109
- left_knee = landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value]
110
- right_knee = landmarks[mp_pose.PoseLandmark.RIGHT_KNEE.value]
111
-
112
- # Calculate angles
113
- shoulder_angle = abs(math.degrees(math.atan2(right_shoulder.y - left_shoulder.y, right_shoulder.x - left_shoulder.x)))
114
- hip_angle = abs(math.degrees(math.atan2(right_hip.y - left_hip.y, right_hip.x - left_hip.x)))
115
- knee_angle = abs(math.degrees(math.atan2(right_knee.y - left_knee.y, right_knee.x - left_knee.x)))
116
-
117
- # Calculate vertical alignment
118
- shoulder_hip_alignment = abs((left_shoulder.y + right_shoulder.y) / 2 - (left_hip.y + right_hip.y) / 2)
119
- hip_knee_alignment = abs((left_hip.y + right_hip.y) / 2 - (left_knee.y + right_knee.y) / 2)
120
- # Add head landmarks
121
- nose = landmarks[mp_pose.PoseLandmark.NOSE.value]
122
- left_ear = landmarks[mp_pose.PoseLandmark.LEFT_EAR.value]
123
- right_ear = landmarks[mp_pose.PoseLandmark.RIGHT_EAR.value]
124
- # Calculate head tilt
125
- head_tilt = abs(math.degrees(math.atan2(right_ear.y - left_ear.y, right_ear.x - left_ear.x)))
126
- # Calculate head position relative to shoulders
127
- head_position = abs((nose.y - (left_shoulder.y + right_shoulder.y) / 2) /
128
- ((left_shoulder.y + right_shoulder.y) / 2 - (left_hip.y + right_hip.y) / 2))
129
-
130
- # Combine metrics into a single posture score (you may need to adjust these weights)
131
- posture_score = (
132
- (1 - abs(shoulder_angle - hip_angle) / 90) * 0.3 +
133
- (1 - abs(hip_angle - knee_angle) / 90) * 0.2 +
134
- (1 - shoulder_hip_alignment) * 0.1 +
135
- (1 - hip_knee_alignment) * 0.1 +
136
- (1 - abs(head_tilt - 90) / 90) * 0.15 +
137
- (1 - head_position) * 0.15
138
- )
139
-
140
- return posture_score, results.pose_landmarks
141
-
142
- def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
143
- os.makedirs(output_folder, exist_ok=True)
144
- clip = VideoFileClip(video_path)
145
- original_fps = clip.fps
146
- duration = clip.duration
147
- total_frames = int(duration * original_fps)
148
- step = max(1, original_fps / desired_fps)
149
- total_frames_to_extract = int(total_frames / step)
150
-
151
- frame_count = 0
152
- for t in np.arange(0, duration, step / original_fps):
153
- frame = clip.get_frame(t)
154
- img = Image.fromarray(frame)
155
- img.save(os.path.join(output_folder, f"frame_{frame_count:04d}.jpg"))
156
- frame_count += 1
157
- if progress_callback:
158
- progress = min(100, (frame_count / total_frames_to_extract) * 100)
159
- progress_callback(progress, f"Extracting frame")
160
- if frame_count >= total_frames_to_extract:
161
- break
162
- clip.close()
163
- return frame_count, original_fps
164
-
165
- def is_frontal_face(landmarks, threshold=40):
166
- nose_tip = landmarks[4]
167
- left_chin = landmarks[234]
168
- right_chin = landmarks[454]
169
- nose_to_left = [left_chin.x - nose_tip.x, left_chin.y - nose_tip.y]
170
- nose_to_right = [right_chin.x - nose_tip.x, right_chin.y - nose_tip.y]
171
- dot_product = nose_to_left[0] * nose_to_right[0] + nose_to_left[1] * nose_to_right[1]
172
- magnitude_left = math.sqrt(nose_to_left[0] ** 2 + nose_to_left[1] ** 2)
173
- magnitude_right = math.sqrt(nose_to_right[0] ** 2 + nose_to_right[1] ** 2)
174
- cos_angle = dot_product / (magnitude_left * magnitude_right)
175
- angle = math.acos(cos_angle)
176
- angle_degrees = math.degrees(angle)
177
- return abs(180 - angle_degrees) < threshold
178
-
179
-
180
- def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size):
181
- embeddings_by_frame = {}
182
- emotions_by_frame = {}
183
- posture_scores_by_frame = {}
184
- posture_landmarks_by_frame = {}
185
- aligned_face_paths = []
186
- frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith('.jpg')])
187
-
188
- for i in range(0, len(frame_files), batch_size):
189
- batch_files = frame_files[i:i + batch_size]
190
- batch_frames = []
191
- batch_nums = []
192
-
193
- for frame_file in batch_files:
194
- frame_num = int(frame_file.split('_')[1].split('.')[0])
195
- frame_path = os.path.join(frames_folder, frame_file)
196
- frame = cv2.imread(frame_path)
197
- if frame is not None:
198
- batch_frames.append(frame)
199
- batch_nums.append(frame_num)
200
-
201
- if batch_frames:
202
- batch_boxes, batch_probs = mtcnn.detect(batch_frames)
203
-
204
- for j, (frame, frame_num, boxes, probs) in enumerate(
205
- zip(batch_frames, batch_nums, batch_boxes, batch_probs)):
206
-
207
- # Calculate posture score for the full frame
208
- posture_score, posture_landmarks = calculate_posture_score(frame)
209
- posture_scores_by_frame[frame_num] = posture_score
210
- posture_landmarks_by_frame[frame_num] = posture_landmarks
211
-
212
- if boxes is not None and len(boxes) > 0 and probs[0] >= 0.99:
213
- x1, y1, x2, y2 = [int(b) for b in boxes[0]]
214
- face = frame[y1:y2, x1:x2]
215
- if face.size > 0:
216
- results = face_mesh.process(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
217
- if results.multi_face_landmarks and is_frontal_face(results.multi_face_landmarks[0].landmark):
218
-
219
- #aligned_face = alignFace(face)
220
- aligned_face = face
221
-
222
- if aligned_face is not None:
223
- aligned_face_resized = cv2.resize(aligned_face, (160, 160))
224
- output_path = os.path.join(aligned_faces_folder, f"frame_{frame_num}_face.jpg")
225
- cv2.imwrite(output_path, aligned_face_resized)
226
- aligned_face_paths.append(output_path)
227
- embedding = get_face_embedding(aligned_face_resized)
228
- embeddings_by_frame[frame_num] = embedding
229
-
230
- progress((i + len(batch_files)) / len(frame_files),
231
- f"Processing frames {i + 1} to {min(i + len(batch_files), len(frame_files))} of {len(frame_files)}")
232
-
233
- return embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths
234
-
235
-
236
- def cluster_faces(embeddings):
237
- if len(embeddings) < 2:
238
- print("Not enough faces for clustering. Assigning all to one cluster.")
239
- return np.zeros(len(embeddings), dtype=int)
240
-
241
- X = np.stack(embeddings)
242
- dbscan = DBSCAN(eps=0.5, min_samples=5, metric='cosine')
243
- clusters = dbscan.fit_predict(X)
244
-
245
- if np.all(clusters == -1):
246
- print("DBSCAN assigned all to noise. Considering as one cluster.")
247
- return np.zeros(len(embeddings), dtype=int)
248
-
249
- return clusters
250
-
251
- def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder):
252
- for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
253
- person_folder = os.path.join(organized_faces_folder, f"person_{cluster}")
254
- os.makedirs(person_folder, exist_ok=True)
255
- src = os.path.join(aligned_faces_folder, f"frame_{frame_num}_face.jpg")
256
- dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
257
- shutil.copy(src, dst)
258
-
259
- def save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps, original_fps, output_folder, video_duration):
260
- person_data = {}
261
-
262
- for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
263
- if cluster not in person_data:
264
- person_data[cluster] = []
265
- person_data[cluster].append((frame_num, embedding))
266
-
267
- largest_cluster = max(person_data, key=lambda k: len(person_data[k]))
268
-
269
- data = person_data[largest_cluster]
270
- data.sort(key=lambda x: x[0])
271
- frames, embeddings = zip(*data)
272
-
273
- embeddings_array = np.array(embeddings)
274
- np.save(os.path.join(output_folder, 'face_embeddings.npy'), embeddings_array)
275
-
276
- total_frames = max(frames)
277
- timecodes = [frame_to_timecode(frame, total_frames, video_duration) for frame in frames]
278
-
279
- df_data = {
280
- 'Frame': frames,
281
- 'Timecode': timecodes,
282
- 'Embedding_Index': range(len(embeddings))
283
- }
284
-
285
- for i in range(len(embeddings[0])):
286
- df_data[f'Raw_Embedding_{i}'] = [embedding[i] for embedding in embeddings]
287
-
288
- df = pd.DataFrame(df_data)
289
-
290
- return df, largest_cluster
291
-
292
- class Autoencoder(nn.Module):
293
- def __init__(self, input_size):
294
- super(Autoencoder, self).__init__()
295
- self.encoder = nn.Sequential(
296
- nn.Linear(input_size, 256),
297
- nn.ReLU(),
298
- nn.Linear(256, 128),
299
- nn.ReLU(),
300
- nn.Linear(128, 64),
301
- nn.ReLU(),
302
- nn.Linear(64, 32)
303
- )
304
- self.decoder = nn.Sequential(
305
- nn.Linear(32, 64),
306
- nn.ReLU(),
307
- nn.Linear(64, 128),
308
- nn.ReLU(),
309
- nn.Linear(128, 256),
310
- nn.ReLU(),
311
- nn.Linear(256, input_size)
312
- )
313
-
314
- def forward(self, x):
315
- batch_size, seq_len, _ = x.size()
316
- x = x.view(batch_size * seq_len, -1)
317
- encoded = self.encoder(x)
318
- decoded = self.decoder(encoded)
319
- return decoded.view(batch_size, seq_len, -1)
320
-
321
- def determine_anomalies(mse_values, threshold):
322
- mean = np.mean(mse_values)
323
- std = np.std(mse_values)
324
- anomalies = mse_values > (mean + threshold * std)
325
- return anomalies
326
-
327
- def anomaly_detection(X_embeddings, X_posture, epochs=200, batch_size=8, patience=5):
328
- # Normalize posture
329
- scaler_posture = MinMaxScaler()
330
- X_posture_scaled = scaler_posture.fit_transform(X_posture.reshape(-1, 1))
331
-
332
- # Process facial embeddings
333
- X_embeddings = torch.FloatTensor(X_embeddings).to(device)
334
- if X_embeddings.dim() == 2:
335
- X_embeddings = X_embeddings.unsqueeze(0)
336
-
337
- # Process posture
338
- X_posture_scaled = torch.FloatTensor(X_posture_scaled).to(device)
339
- if X_posture_scaled.dim() == 2:
340
- X_posture_scaled = X_posture_scaled.unsqueeze(0)
341
-
342
- model_embeddings = Autoencoder(input_size=X_embeddings.shape[2]).to(device)
343
- model_posture = Autoencoder(input_size=X_posture_scaled.shape[2]).to(device)
344
-
345
- criterion = nn.MSELoss()
346
- optimizer_embeddings = optim.Adam(model_embeddings.parameters())
347
- optimizer_posture = optim.Adam(model_posture.parameters())
348
-
349
- # Train models
350
- for epoch in range(epochs):
351
- for model, optimizer, X in [(model_embeddings, optimizer_embeddings, X_embeddings),
352
- (model_posture, optimizer_posture, X_posture_scaled)]:
353
- model.train()
354
- optimizer.zero_grad()
355
- output = model(X)
356
- loss = criterion(output, X)
357
- loss.backward()
358
- optimizer.step()
359
-
360
- # Compute MSE for embeddings and posture
361
- model_embeddings.eval()
362
- model_posture.eval()
363
- with torch.no_grad():
364
- reconstructed_embeddings = model_embeddings(X_embeddings).cpu().numpy()
365
- reconstructed_posture = model_posture(X_posture_scaled).cpu().numpy()
366
-
367
- mse_embeddings = np.mean(np.power(X_embeddings.cpu().numpy() - reconstructed_embeddings, 2), axis=2).squeeze()
368
- mse_posture = np.mean(np.power(X_posture_scaled.cpu().numpy() - reconstructed_posture, 2), axis=2).squeeze()
369
-
370
- return mse_embeddings, mse_posture
371
-
372
- def plot_mse(df, mse_values, title, color='navy', time_threshold=3, anomaly_threshold=4):
373
- plt.figure(figsize=(16, 8), dpi=400)
374
- fig, ax = plt.subplots(figsize=(16, 8))
375
-
376
- if 'Seconds' not in df.columns:
377
- df['Seconds'] = df['Timecode'].apply(
378
- lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
379
-
380
- # Ensure df and mse_values have the same length and remove NaN values
381
- min_length = min(len(df), len(mse_values))
382
- df = df.iloc[:min_length]
383
- mse_values = mse_values[:min_length]
384
-
385
- # Remove NaN values
386
- mask = ~np.isnan(mse_values)
387
- df = df[mask]
388
- mse_values = mse_values[mask]
389
-
390
- mean = pd.Series(mse_values).rolling(window=10).mean()
391
- std = pd.Series(mse_values).rolling(window=10).std()
392
- median = np.median(mse_values)
393
-
394
- ax.scatter(df['Seconds'], mse_values, color=color, alpha=0.3, s=5)
395
- ax.plot(df['Seconds'], mean, color=color, linewidth=0.5)
396
- ax.fill_between(df['Seconds'], mean - std, mean + std, color=color, alpha=0.1)
397
-
398
- # Add median line
399
- ax.axhline(y=median, color='black', linestyle='--', label='Median Baseline')
400
-
401
- # Add threshold line
402
- threshold = np.mean(mse_values) + anomaly_threshold * np.std(mse_values)
403
- ax.axhline(y=threshold, color='red', linestyle='--', label=f'Threshold: {anomaly_threshold:.1f}')
404
- ax.text(ax.get_xlim()[1], threshold, f'Threshold: {anomaly_threshold:.1f}', verticalalignment='center', horizontalalignment='left', color='red')
405
-
406
- anomalies = determine_anomalies(mse_values, anomaly_threshold)
407
- anomaly_frames = df['Frame'].iloc[anomalies].tolist()
408
-
409
- ax.scatter(df['Seconds'].iloc[anomalies], mse_values[anomalies], color='red', s=20, zorder=5)
410
-
411
- anomaly_data = list(zip(df['Timecode'].iloc[anomalies],
412
- df['Seconds'].iloc[anomalies],
413
- mse_values[anomalies]))
414
- anomaly_data.sort(key=lambda x: x[1])
415
-
416
- grouped_anomalies = []
417
- current_group = []
418
- for timecode, sec, mse in anomaly_data:
419
- if not current_group or sec - current_group[-1][1] <= time_threshold:
420
- current_group.append((timecode, sec, mse))
421
- else:
422
- grouped_anomalies.append(current_group)
423
- current_group = [(timecode, sec, mse)]
424
- if current_group:
425
- grouped_anomalies.append(current_group)
426
-
427
- for group in grouped_anomalies:
428
- start_sec = group[0][1]
429
- end_sec = group[-1][1]
430
- rect = Rectangle((start_sec, ax.get_ylim()[0]), end_sec - start_sec, ax.get_ylim()[1] - ax.get_ylim()[0],
431
- facecolor='red', alpha=0.2, zorder=1)
432
- ax.add_patch(rect)
433
-
434
- for group in grouped_anomalies:
435
- highest_mse_anomaly = max(group, key=lambda x: x[2])
436
- timecode, sec, mse = highest_mse_anomaly
437
- ax.annotate(timecode, (sec, mse), textcoords="offset points", xytext=(0, 10),
438
- ha='center', fontsize=6, color='red')
439
-
440
- max_seconds = df['Seconds'].max()
441
- num_ticks = 100
442
- tick_locations = np.linspace(0, max_seconds, num_ticks)
443
- tick_labels = [seconds_to_timecode(int(s)) for s in tick_locations]
444
-
445
- ax.set_xticks(tick_locations)
446
- ax.set_xticklabels(tick_labels, rotation=90, ha='center', fontsize=6)
447
-
448
- ax.set_xlabel('Timecode')
449
- ax.set_ylabel('Mean Squared Error')
450
- ax.set_title(title)
451
-
452
- ax.grid(True, linestyle='--', alpha=0.7)
453
- ax.legend()
454
- plt.tight_layout()
455
- plt.close()
456
- return fig, anomaly_frames
457
-
458
- def plot_mse_histogram(mse_values, title, anomaly_threshold, color='blue'):
459
- plt.figure(figsize=(16, 4), dpi=400)
460
- fig, ax = plt.subplots(figsize=(16, 4))
461
-
462
- ax.hist(mse_values, bins=100, edgecolor='black', color=color, alpha=0.7)
463
- ax.set_xlabel('Mean Squared Error')
464
- ax.set_ylabel('Number of Samples')
465
- ax.set_title(title)
466
-
467
- mean = np.mean(mse_values)
468
- std = np.std(mse_values)
469
- threshold = mean + anomaly_threshold * std
470
-
471
- ax.axvline(x=threshold, color='red', linestyle='--', linewidth=2)
472
-
473
- # Move annotation to the bottom and away from the line
474
- ax.annotate(f'Threshold: {anomaly_threshold:.1f}',
475
- xy=(threshold, ax.get_ylim()[0]),
476
- xytext=(0, -20),
477
- textcoords='offset points',
478
- ha='center', va='top',
479
- bbox=dict(boxstyle='round,pad=0.5', fc='white', ec='none', alpha=0.7),
480
- color='red')
481
-
482
- plt.tight_layout()
483
- plt.close()
484
- return fig
485
-
486
-
487
- def plot_posture(df, posture_scores, color='blue', anomaly_threshold=4):
488
- plt.figure(figsize=(16, 8), dpi=400)
489
- fig, ax = plt.subplots(figsize=(16, 8))
490
-
491
- df['Seconds'] = df['Timecode'].apply(
492
- lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
493
-
494
- posture_data = [(frame, score) for frame, score in posture_scores.items() if score is not None]
495
- posture_frames, posture_scores = zip(*posture_data)
496
-
497
- # Create a new dataframe for posture data
498
- posture_df = pd.DataFrame({'Frame': posture_frames, 'Score': posture_scores})
499
- posture_df = posture_df.merge(df[['Frame', 'Seconds']], on='Frame', how='inner')
500
-
501
- ax.scatter(posture_df['Seconds'], posture_df['Score'], color=color, alpha=0.3, s=5)
502
- mean = posture_df['Score'].rolling(window=10).mean()
503
- ax.plot(posture_df['Seconds'], mean, color=color, linewidth=0.5)
504
-
505
- ax.set_xlabel('Timecode')
506
- ax.set_ylabel('Posture Score')
507
- ax.set_title("Body Posture Over Time")
508
-
509
- ax.grid(True, linestyle='--', alpha=0.7)
510
-
511
- max_seconds = df['Seconds'].max()
512
- num_ticks = 80
513
- tick_locations = np.linspace(0, max_seconds, num_ticks)
514
- tick_labels = [seconds_to_timecode(int(s)) for s in tick_locations]
515
-
516
- ax.set_xticks(tick_locations)
517
- ax.set_xticklabels(tick_labels, rotation=90, ha='center', fontsize=6)
518
-
519
- plt.tight_layout()
520
- plt.close()
521
- return fig
522
-
523
-
524
- def plot_mse_heatmap(mse_values, title, df):
525
- plt.figure(figsize=(20, 5), dpi=400)
526
- fig, ax = plt.subplots(figsize=(20, 5))
527
-
528
- # Reshape MSE values to 2D array for heatmap
529
- mse_2d = mse_values.reshape(1, -1)
530
-
531
- # Create heatmap
532
- sns.heatmap(mse_2d, cmap='YlOrRd', cbar_kws={'label': 'MSE'}, ax=ax)
533
-
534
- # Set x-axis ticks to timecodes
535
- num_ticks = 60
536
- tick_locations = np.linspace(0, len(mse_values) - 1, num_ticks).astype(int)
537
- tick_labels = [df['Timecode'].iloc[i] for i in tick_locations]
538
-
539
- ax.set_xticks(tick_locations)
540
- ax.set_xticklabels(tick_labels, rotation=90, ha='center', va='top') # Adjusted rotation and alignment
541
-
542
- ax.set_title(title)
543
-
544
- # Remove y-axis labels
545
- ax.set_yticks([])
546
-
547
- plt.tight_layout() # Ensure all elements fit within the figure
548
- plt.close()
549
- return fig
550
-
551
- def draw_pose_landmarks(frame, landmarks):
552
- annotated_frame = frame.copy()
553
- # Include relevant landmarks for head position and body
554
- body_landmarks = [
555
- mp_pose.PoseLandmark.NOSE,
556
- mp_pose.PoseLandmark.LEFT_SHOULDER,
557
- mp_pose.PoseLandmark.RIGHT_SHOULDER,
558
- mp_pose.PoseLandmark.LEFT_EAR,
559
- mp_pose.PoseLandmark.RIGHT_EAR,
560
- mp_pose.PoseLandmark.LEFT_ELBOW,
561
- mp_pose.PoseLandmark.RIGHT_ELBOW,
562
- mp_pose.PoseLandmark.LEFT_WRIST,
563
- mp_pose.PoseLandmark.RIGHT_WRIST,
564
- mp_pose.PoseLandmark.LEFT_HIP,
565
- mp_pose.PoseLandmark.RIGHT_HIP,
566
- mp_pose.PoseLandmark.LEFT_KNEE,
567
- mp_pose.PoseLandmark.RIGHT_KNEE,
568
- mp_pose.PoseLandmark.LEFT_ANKLE,
569
- mp_pose.PoseLandmark.RIGHT_ANKLE
570
- ]
571
-
572
- # Connections for head position and body
573
- body_connections = [
574
- (mp_pose.PoseLandmark.LEFT_EAR, mp_pose.PoseLandmark.LEFT_SHOULDER),
575
- (mp_pose.PoseLandmark.RIGHT_EAR, mp_pose.PoseLandmark.RIGHT_SHOULDER),
576
- (mp_pose.PoseLandmark.NOSE, mp_pose.PoseLandmark.LEFT_SHOULDER),
577
- (mp_pose.PoseLandmark.NOSE, mp_pose.PoseLandmark.RIGHT_SHOULDER),
578
- (mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.RIGHT_SHOULDER),
579
- (mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.LEFT_ELBOW),
580
- (mp_pose.PoseLandmark.RIGHT_SHOULDER, mp_pose.PoseLandmark.RIGHT_ELBOW),
581
- (mp_pose.PoseLandmark.LEFT_ELBOW, mp_pose.PoseLandmark.LEFT_WRIST),
582
- (mp_pose.PoseLandmark.RIGHT_ELBOW, mp_pose.PoseLandmark.RIGHT_WRIST),
583
- (mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.LEFT_HIP),
584
- (mp_pose.PoseLandmark.RIGHT_SHOULDER, mp_pose.PoseLandmark.RIGHT_HIP),
585
- (mp_pose.PoseLandmark.LEFT_HIP, mp_pose.PoseLandmark.RIGHT_HIP),
586
- (mp_pose.PoseLandmark.LEFT_HIP, mp_pose.PoseLandmark.LEFT_KNEE),
587
- (mp_pose.PoseLandmark.RIGHT_HIP, mp_pose.PoseLandmark.RIGHT_KNEE),
588
- (mp_pose.PoseLandmark.LEFT_KNEE, mp_pose.PoseLandmark.LEFT_ANKLE),
589
- (mp_pose.PoseLandmark.RIGHT_KNEE, mp_pose.PoseLandmark.RIGHT_ANKLE)
590
- ]
591
-
592
- # Draw landmarks
593
- for landmark in body_landmarks:
594
- if landmark in landmarks.landmark:
595
- lm = landmarks.landmark[landmark]
596
- h, w, _ = annotated_frame.shape
597
- cx, cy = int(lm.x * w), int(lm.y * h)
598
- cv2.circle(annotated_frame, (cx, cy), 5, (245, 117, 66), -1)
599
-
600
- # Draw connections
601
- for connection in body_connections:
602
- start_lm = landmarks.landmark[connection[0]]
603
- end_lm = landmarks.landmark[connection[1]]
604
- h, w, _ = annotated_frame.shape
605
- start_point = (int(start_lm.x * w), int(start_lm.y * h))
606
- end_point = (int(end_lm.x * w), int(end_lm.y * h))
607
- cv2.line(annotated_frame, start_point, end_point, (245, 66, 230), 2)
608
-
609
- # Highlight head tilt
610
- left_ear = landmarks.landmark[mp_pose.PoseLandmark.LEFT_EAR]
611
- right_ear = landmarks.landmark[mp_pose.PoseLandmark.RIGHT_EAR]
612
- nose = landmarks.landmark[mp_pose.PoseLandmark.NOSE]
613
-
614
- h, w, _ = annotated_frame.shape
615
- left_ear_point = (int(left_ear.x * w), int(left_ear.y * h))
616
- right_ear_point = (int(right_ear.x * w), int(right_ear.y * h))
617
- nose_point = (int(nose.x * w), int(nose.y * h))
618
-
619
- # Draw a line between ears to show head tilt
620
- cv2.line(annotated_frame, left_ear_point, right_ear_point, (0, 255, 0), 2)
621
-
622
- # Draw a line from nose to the midpoint between shoulders to show head forward/backward tilt
623
- left_shoulder = landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER]
624
- right_shoulder = landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER]
625
- shoulder_mid_x = (left_shoulder.x + right_shoulder.x) / 2
626
- shoulder_mid_y = (left_shoulder.y + right_shoulder.y) / 2
627
- shoulder_mid_point = (int(shoulder_mid_x * w), int(shoulder_mid_y * h))
628
- cv2.line(annotated_frame, nose_point, shoulder_mid_point, (0, 255, 0), 2)
629
-
630
- return annotated_frame
631
-
632
- def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster, max_samples=500):
633
- face_samples = {"most_frequent": [], "others": []}
634
- for cluster_folder in sorted(os.listdir(organized_faces_folder)):
635
- if cluster_folder.startswith("person_"):
636
- person_folder = os.path.join(organized_faces_folder, cluster_folder)
637
- face_files = sorted([f for f in os.listdir(person_folder) if f.endswith('.jpg')])
638
- if face_files:
639
- cluster_id = int(cluster_folder.split('_')[1])
640
- if cluster_id == largest_cluster:
641
- for i, sample in enumerate(face_files[:max_samples]):
642
- face_path = os.path.join(person_folder, sample)
643
- output_path = os.path.join(output_folder, f"face_sample_most_frequent_{i:04d}.jpg")
644
- face_img = cv2.imread(face_path)
645
- if face_img is not None:
646
- small_face = cv2.resize(face_img, (160, 160))
647
- cv2.imwrite(output_path, small_face)
648
- face_samples["most_frequent"].append(output_path)
649
- if len(face_samples["most_frequent"]) >= max_samples:
650
- break
651
- else:
652
- remaining_samples = max_samples - len(face_samples["others"])
653
- if remaining_samples > 0:
654
- for i, sample in enumerate(face_files[:remaining_samples]):
655
- face_path = os.path.join(person_folder, sample)
656
- output_path = os.path.join(output_folder, f"face_sample_other_{cluster_id:02d}_{i:04d}.jpg")
657
- face_img = cv2.imread(face_path)
658
- if face_img is not None:
659
- small_face = cv2.resize(face_img, (160, 160))
660
- cv2.imwrite(output_path, small_face)
661
- face_samples["others"].append(output_path)
662
- if len(face_samples["others"]) >= max_samples:
663
- break
664
- return face_samples
665
-
666
-
667
- def process_video(video_path, anomaly_threshold, desired_fps, progress=gr.Progress()):
668
- start_time = time.time()
669
- output_folder = "output"
670
- os.makedirs(output_folder, exist_ok=True)
671
- batch_size = 16
672
-
673
- GRAPH_COLORS = {
674
- 'facial_embeddings': 'navy',
675
- 'body_posture': 'purple'
676
- }
677
-
678
- with tempfile.TemporaryDirectory() as temp_dir:
679
- aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
680
- organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
681
- os.makedirs(aligned_faces_folder, exist_ok=True)
682
- os.makedirs(organized_faces_folder, exist_ok=True)
683
-
684
- clip = VideoFileClip(video_path)
685
- video_duration = clip.duration
686
- clip.close()
687
-
688
- progress(0, "Starting frame extraction")
689
- frames_folder = os.path.join(temp_dir, 'extracted_frames')
690
-
691
- def extraction_progress(percent, message):
692
- progress(percent / 100, f"Extracting frames")
693
-
694
- frame_count, original_fps = extract_frames(video_path, frames_folder, desired_fps, extraction_progress)
695
-
696
- progress(1, "Frame extraction complete")
697
- progress(0.3, "Processing frames")
698
- embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths = process_frames(
699
- frames_folder, aligned_faces_folder,
700
- frame_count,
701
- progress, batch_size)
702
-
703
- if not aligned_face_paths:
704
- raise ValueError("No faces were extracted from the video.")
705
-
706
- progress(0.6, "Clustering faces")
707
- embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
708
- clusters = cluster_faces(embeddings)
709
- num_clusters = len(set(clusters))
710
-
711
- progress(0.7, "Organizing faces")
712
- organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
713
-
714
- progress(0.8, "Saving person data")
715
- df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps,
716
- original_fps, temp_dir, video_duration)
717
-
718
- # Add 'Seconds' column to df
719
- df['Seconds'] = df['Timecode'].apply(
720
- lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
721
-
722
- progress(0.85, "Getting face samples")
723
- face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
724
-
725
- progress(0.9, "Performing anomaly detection")
726
- embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
727
-
728
- X_embeddings = df[embedding_columns].values
729
-
730
- try:
731
- X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
732
- X_posture = X_posture[X_posture != None].reshape(-1, 1) # Remove None values and reshape
733
-
734
- # Ensure X_posture is not empty
735
- if len(X_posture) == 0:
736
- raise ValueError("No valid posture data found")
737
-
738
- mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture, batch_size=batch_size)
739
-
740
- progress(0.95, "Generating plots")
741
- mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
742
- color=GRAPH_COLORS['facial_embeddings'],
743
- anomaly_threshold=anomaly_threshold)
744
-
745
- mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
746
- anomaly_threshold, color=GRAPH_COLORS['facial_embeddings'])
747
-
748
- mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
749
- color=GRAPH_COLORS['body_posture'],
750
- anomaly_threshold=anomaly_threshold)
751
-
752
- mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
753
- anomaly_threshold, color=GRAPH_COLORS['body_posture'])
754
-
755
- mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
756
- mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
757
-
758
- except Exception as e:
759
- print(f"Error details: {str(e)}")
760
- import traceback
761
- traceback.print_exc()
762
- return (f"Error in video processing: {str(e)}",) + (None,) * 14
763
-
764
- progress(1.0, "Preparing results")
765
- results = f"Number of persons detected: {num_clusters}\n\n"
766
- results += "Breakdown:\n"
767
- for cluster_id in range(num_clusters):
768
- face_count = len([c for c in clusters if c == cluster_id])
769
- results += f"Person {cluster_id + 1}: {face_count} face frames\n"
770
-
771
- end_time = time.time()
772
- execution_time = end_time - start_time
773
-
774
- def add_timecode_to_image(image, timecode):
775
- img_pil = Image.fromarray(image)
776
- draw = ImageDraw.Draw(img_pil)
777
- font = ImageFont.truetype("arial.ttf", 15)
778
- draw.text((10, 10), timecode, (255, 0, 0), font=font)
779
- return np.array(img_pil)
780
-
781
- # In the process_video function, update the anomaly frame processing:
782
- anomaly_faces_embeddings = []
783
- for frame in anomaly_frames_embeddings:
784
- face_path = os.path.join(aligned_faces_folder, f"frame_{frame}_face.jpg")
785
- if os.path.exists(face_path):
786
- face_img = cv2.imread(face_path)
787
- if face_img is not None:
788
- face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
789
- timecode = df[df['Frame'] == frame]['Timecode'].iloc[0]
790
- face_img_with_timecode = add_timecode_to_image(face_img, timecode)
791
- anomaly_faces_embeddings.append(face_img_with_timecode)
792
-
793
- anomaly_frames_posture_images = []
794
- for frame in anomaly_frames_posture:
795
- frame_path = os.path.join(frames_folder, f"frame_{frame:04d}.jpg")
796
- if os.path.exists(frame_path):
797
- frame_img = cv2.imread(frame_path)
798
- if frame_img is not None:
799
- frame_img = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)
800
- pose_results = pose.process(frame_img)
801
- if pose_results.pose_landmarks:
802
- frame_img = draw_pose_landmarks(frame_img, pose_results.pose_landmarks)
803
- timecode = df[df['Frame'] == frame]['Timecode'].iloc[0]
804
- frame_img_with_timecode = add_timecode_to_image(frame_img, timecode)
805
- anomaly_frames_posture_images.append(frame_img_with_timecode)
806
-
807
- return (
808
- execution_time,
809
- results,
810
- df,
811
- mse_embeddings,
812
- mse_posture,
813
- mse_plot_embeddings,
814
- mse_histogram_embeddings,
815
- mse_plot_posture,
816
- mse_histogram_posture,
817
- mse_heatmap_embeddings,
818
- mse_heatmap_posture,
819
- face_samples["most_frequent"],
820
- face_samples["others"],
821
- anomaly_faces_embeddings,
822
- anomaly_frames_posture_images,
823
- aligned_faces_folder,
824
- frames_folder
825
- )
826
-
827
-
828
- with gr.Blocks() as iface:
829
- gr.Markdown("""
830
- # Facial Expression and Body Language Anomaly Detection
831
-
832
- This application analyzes videos to detect anomalies in facial features and body language.
833
- It processes the video frames to extract facial embeddings and body posture,
834
- then uses machine learning techniques to identify unusual patterns or deviations from the norm.
835
-
836
- For more information, visit: [https://github.com/reab5555/Facial-Expression-Anomaly-Detection](https://github.com/reab5555/Facial-Expression-Anomaly-Detection)
837
- """)
838
-
839
- with gr.Row():
840
- video_input = gr.Video()
841
-
842
- anomaly_threshold = gr.Slider(minimum=1, maximum=5, step=0.1, value=3, label="Anomaly Detection Threshold")
843
- process_btn = gr.Button("Process Video")
844
- progress_bar = gr.Progress()
845
- execution_time = gr.Number(label="Execution Time (seconds)")
846
-
847
- with gr.Group(visible=False) as results_group:
848
- results_text = gr.TextArea(label="Anomaly Detection Results", lines=4)
849
-
850
- with gr.Tab("Facial Features"):
851
- mse_features_plot = gr.Plot(label="MSE: Facial Features")
852
- mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
853
- mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
854
- anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
855
-
856
- with gr.Tab("Body Posture"):
857
- mse_posture_plot = gr.Plot(label="MSE: Body Posture")
858
- mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
859
- mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
860
- anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
861
-
862
- with gr.Tab("Face Samples"):
863
- face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples (Target)", columns=6, rows=2, height="auto")
864
- face_samples_others = gr.Gallery(label="Other Persons Samples", columns=6, rows=1, height="auto")
865
-
866
- # Hidden components to store intermediate results
867
- df_store = gr.State()
868
- mse_features_store = gr.State()
869
- mse_posture_store = gr.State()
870
- aligned_faces_folder_store = gr.State()
871
- frames_folder_store = gr.State()
872
- mse_heatmap_embeddings_store = gr.State()
873
- mse_heatmap_posture_store = gr.State()
874
-
875
- def process_and_show_completion(video_input_path, anomaly_threshold_input):
876
- try:
877
- print("Starting video processing...")
878
- results = process_video(video_input_path, anomaly_threshold_input, FIXED_FPS, progress=progress_bar)
879
- print("Video processing completed.")
880
-
881
- if isinstance(results[0], str) and results[0].startswith("Error"):
882
- print(f"Error occurred: {results[0]}")
883
- return [results[0]] + [None] * 18 # Update this line to match the number of outputs
884
-
885
- exec_time, results_summary, df, mse_embeddings, mse_posture, \
886
- mse_plot_embeddings, mse_histogram_embeddings, \
887
- mse_plot_posture, mse_histogram_posture, \
888
- mse_heatmap_embeddings, mse_heatmap_posture, \
889
- face_samples_frequent, face_samples_other, \
890
- anomaly_faces_embeddings, anomaly_frames_posture_images, \
891
- aligned_faces_folder, frames_folder = results
892
-
893
- # Convert numpy arrays to PIL Images for the galleries
894
- anomaly_faces_embeddings_pil = [Image.fromarray(face) for face in anomaly_faces_embeddings]
895
- anomaly_frames_posture_pil = [Image.fromarray(frame) for frame in anomaly_frames_posture_images]
896
-
897
- # Ensure face samples are in the correct format for Gradio
898
- face_samples_frequent = [Image.open(path) for path in face_samples_frequent]
899
- face_samples_other = [Image.open(path) for path in face_samples_other]
900
-
901
- output = [
902
- exec_time, results_summary,
903
- df, mse_embeddings, mse_posture,
904
- mse_plot_embeddings, mse_plot_posture,
905
- mse_histogram_embeddings, mse_histogram_posture,
906
- mse_heatmap_embeddings, mse_heatmap_posture,
907
- anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
908
- face_samples_frequent, face_samples_other,
909
- aligned_faces_folder, frames_folder,
910
- mse_embeddings, mse_posture
911
- ]
912
-
913
- return output
914
-
915
- except Exception as e:
916
- error_message = f"An error occurred: {str(e)}"
917
- print(error_message)
918
- import traceback
919
- traceback.print_exc()
920
- return [error_message] + [None] * 18
921
-
922
- process_btn.click(
923
- process_and_show_completion,
924
- inputs=[video_input, anomaly_threshold],
925
- outputs=[
926
- execution_time, results_text, df_store,
927
- mse_features_store, mse_posture_store,
928
- mse_features_plot, mse_posture_plot,
929
- mse_features_hist, mse_posture_hist,
930
- mse_features_heatmap, mse_posture_heatmap,
931
- anomaly_frames_features, anomaly_frames_posture,
932
- face_samples_most_frequent, face_samples_others,
933
- aligned_faces_folder_store, frames_folder_store,
934
- mse_heatmap_embeddings_store, mse_heatmap_posture_store
935
- ]
936
- ).then(
937
- lambda: gr.Group(visible=True),
938
- inputs=None,
939
- outputs=[results_group]
940
- )
941
-
942
- if __name__ == "__main__":
943
- iface.launch()