Delete app.py
Browse files
app.py
DELETED
@@ -1,943 +0,0 @@
|
|
1 |
-
import math
|
2 |
-
import os
|
3 |
-
import cv2
|
4 |
-
import numpy as np
|
5 |
-
import torch
|
6 |
-
import torch.nn as nn
|
7 |
-
import torch.optim as optim
|
8 |
-
from facenet_pytorch import InceptionResnetV1, MTCNN
|
9 |
-
import tensorflow as tf
|
10 |
-
import mediapipe as mp
|
11 |
-
from sklearn.cluster import DBSCAN
|
12 |
-
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
13 |
-
import pandas as pd
|
14 |
-
import matplotlib
|
15 |
-
import matplotlib.pyplot as plt
|
16 |
-
import seaborn as sns
|
17 |
-
from matplotlib.patches import Rectangle
|
18 |
-
from moviepy.editor import VideoFileClip
|
19 |
-
from PIL import Image, ImageDraw, ImageFont
|
20 |
-
import gradio as gr
|
21 |
-
import tempfile
|
22 |
-
import shutil
|
23 |
-
import time
|
24 |
-
|
25 |
-
|
26 |
-
matplotlib.rcParams['figure.dpi'] = 400
|
27 |
-
matplotlib.rcParams['savefig.dpi'] = 400
|
28 |
-
|
29 |
-
# Initialize models and other global variables
|
30 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
31 |
-
|
32 |
-
FIXED_FPS = 5
|
33 |
-
|
34 |
-
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.95, 0.95, 0.95], min_face_size=80)
|
35 |
-
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
|
36 |
-
|
37 |
-
mp_face_mesh = mp.solutions.face_mesh
|
38 |
-
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
|
39 |
-
|
40 |
-
mp_pose = mp.solutions.pose
|
41 |
-
mp_drawing = mp.solutions.drawing_utils
|
42 |
-
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.8, min_tracking_confidence=0.8)
|
43 |
-
|
44 |
-
def frame_to_timecode(frame_num, total_frames, duration):
|
45 |
-
total_seconds = (frame_num / total_frames) * duration
|
46 |
-
hours = int(total_seconds // 3600)
|
47 |
-
minutes = int((total_seconds % 3600) // 60)
|
48 |
-
seconds = int(total_seconds % 60)
|
49 |
-
milliseconds = int((total_seconds - int(total_seconds)) * 1000)
|
50 |
-
return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
|
51 |
-
|
52 |
-
def seconds_to_timecode(seconds):
|
53 |
-
hours = int(seconds // 3600)
|
54 |
-
minutes = int((seconds % 3600) // 60)
|
55 |
-
seconds = int(seconds % 60)
|
56 |
-
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
57 |
-
|
58 |
-
def timecode_to_seconds(timecode):
|
59 |
-
h, m, s = map(int, timecode.split(':'))
|
60 |
-
return h * 3600 + m * 60 + s
|
61 |
-
|
62 |
-
def get_face_embedding(face_img):
|
63 |
-
face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
|
64 |
-
face_tensor = (face_tensor - 0.5) / 0.5
|
65 |
-
face_tensor = face_tensor.to(device)
|
66 |
-
with torch.no_grad():
|
67 |
-
embedding = model(face_tensor)
|
68 |
-
return embedding.cpu().numpy().flatten()
|
69 |
-
|
70 |
-
def alignFace(img):
|
71 |
-
img_raw = img.copy()
|
72 |
-
results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
73 |
-
if not results.multi_face_landmarks:
|
74 |
-
return None
|
75 |
-
landmarks = results.multi_face_landmarks[0].landmark
|
76 |
-
left_eye = np.array([[landmarks[33].x, landmarks[33].y], [landmarks[160].x, landmarks[160].y],
|
77 |
-
[landmarks[158].x, landmarks[158].y], [landmarks[144].x, landmarks[144].y],
|
78 |
-
[landmarks[153].x, landmarks[153].y], [landmarks[145].x, landmarks[145].y]])
|
79 |
-
right_eye = np.array([[landmarks[362].x, landmarks[362].y], [landmarks[385].x, landmarks[385].y],
|
80 |
-
[landmarks[387].x, landmarks[387].y], [landmarks[263].x, landmarks[263].y],
|
81 |
-
[landmarks[373].x, landmarks[373].y], [landmarks[380].x, landmarks[380].y]])
|
82 |
-
left_eye_center = left_eye.mean(axis=0).astype(np.int32)
|
83 |
-
right_eye_center = right_eye.mean(axis=0).astype(np.int32)
|
84 |
-
dY = right_eye_center[1] - left_eye_center[1]
|
85 |
-
dX = right_eye_center[0] - left_eye_center[0]
|
86 |
-
angle = np.degrees(np.arctan2(dY, dX))
|
87 |
-
desired_angle = 0
|
88 |
-
angle_diff = desired_angle - angle
|
89 |
-
height, width = img_raw.shape[:2]
|
90 |
-
center = (width // 2, height // 2)
|
91 |
-
rotation_matrix = cv2.getRotationMatrix2D(center, angle_diff, 1)
|
92 |
-
new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
|
93 |
-
return new_img
|
94 |
-
|
95 |
-
def calculate_posture_score(frame):
|
96 |
-
image_height, image_width, _ = frame.shape
|
97 |
-
results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
98 |
-
|
99 |
-
if not results.pose_landmarks:
|
100 |
-
return None, None
|
101 |
-
|
102 |
-
landmarks = results.pose_landmarks.landmark
|
103 |
-
|
104 |
-
# Use only body landmarks
|
105 |
-
left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
|
106 |
-
right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
|
107 |
-
left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value]
|
108 |
-
right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value]
|
109 |
-
left_knee = landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value]
|
110 |
-
right_knee = landmarks[mp_pose.PoseLandmark.RIGHT_KNEE.value]
|
111 |
-
|
112 |
-
# Calculate angles
|
113 |
-
shoulder_angle = abs(math.degrees(math.atan2(right_shoulder.y - left_shoulder.y, right_shoulder.x - left_shoulder.x)))
|
114 |
-
hip_angle = abs(math.degrees(math.atan2(right_hip.y - left_hip.y, right_hip.x - left_hip.x)))
|
115 |
-
knee_angle = abs(math.degrees(math.atan2(right_knee.y - left_knee.y, right_knee.x - left_knee.x)))
|
116 |
-
|
117 |
-
# Calculate vertical alignment
|
118 |
-
shoulder_hip_alignment = abs((left_shoulder.y + right_shoulder.y) / 2 - (left_hip.y + right_hip.y) / 2)
|
119 |
-
hip_knee_alignment = abs((left_hip.y + right_hip.y) / 2 - (left_knee.y + right_knee.y) / 2)
|
120 |
-
# Add head landmarks
|
121 |
-
nose = landmarks[mp_pose.PoseLandmark.NOSE.value]
|
122 |
-
left_ear = landmarks[mp_pose.PoseLandmark.LEFT_EAR.value]
|
123 |
-
right_ear = landmarks[mp_pose.PoseLandmark.RIGHT_EAR.value]
|
124 |
-
# Calculate head tilt
|
125 |
-
head_tilt = abs(math.degrees(math.atan2(right_ear.y - left_ear.y, right_ear.x - left_ear.x)))
|
126 |
-
# Calculate head position relative to shoulders
|
127 |
-
head_position = abs((nose.y - (left_shoulder.y + right_shoulder.y) / 2) /
|
128 |
-
((left_shoulder.y + right_shoulder.y) / 2 - (left_hip.y + right_hip.y) / 2))
|
129 |
-
|
130 |
-
# Combine metrics into a single posture score (you may need to adjust these weights)
|
131 |
-
posture_score = (
|
132 |
-
(1 - abs(shoulder_angle - hip_angle) / 90) * 0.3 +
|
133 |
-
(1 - abs(hip_angle - knee_angle) / 90) * 0.2 +
|
134 |
-
(1 - shoulder_hip_alignment) * 0.1 +
|
135 |
-
(1 - hip_knee_alignment) * 0.1 +
|
136 |
-
(1 - abs(head_tilt - 90) / 90) * 0.15 +
|
137 |
-
(1 - head_position) * 0.15
|
138 |
-
)
|
139 |
-
|
140 |
-
return posture_score, results.pose_landmarks
|
141 |
-
|
142 |
-
def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
|
143 |
-
os.makedirs(output_folder, exist_ok=True)
|
144 |
-
clip = VideoFileClip(video_path)
|
145 |
-
original_fps = clip.fps
|
146 |
-
duration = clip.duration
|
147 |
-
total_frames = int(duration * original_fps)
|
148 |
-
step = max(1, original_fps / desired_fps)
|
149 |
-
total_frames_to_extract = int(total_frames / step)
|
150 |
-
|
151 |
-
frame_count = 0
|
152 |
-
for t in np.arange(0, duration, step / original_fps):
|
153 |
-
frame = clip.get_frame(t)
|
154 |
-
img = Image.fromarray(frame)
|
155 |
-
img.save(os.path.join(output_folder, f"frame_{frame_count:04d}.jpg"))
|
156 |
-
frame_count += 1
|
157 |
-
if progress_callback:
|
158 |
-
progress = min(100, (frame_count / total_frames_to_extract) * 100)
|
159 |
-
progress_callback(progress, f"Extracting frame")
|
160 |
-
if frame_count >= total_frames_to_extract:
|
161 |
-
break
|
162 |
-
clip.close()
|
163 |
-
return frame_count, original_fps
|
164 |
-
|
165 |
-
def is_frontal_face(landmarks, threshold=40):
|
166 |
-
nose_tip = landmarks[4]
|
167 |
-
left_chin = landmarks[234]
|
168 |
-
right_chin = landmarks[454]
|
169 |
-
nose_to_left = [left_chin.x - nose_tip.x, left_chin.y - nose_tip.y]
|
170 |
-
nose_to_right = [right_chin.x - nose_tip.x, right_chin.y - nose_tip.y]
|
171 |
-
dot_product = nose_to_left[0] * nose_to_right[0] + nose_to_left[1] * nose_to_right[1]
|
172 |
-
magnitude_left = math.sqrt(nose_to_left[0] ** 2 + nose_to_left[1] ** 2)
|
173 |
-
magnitude_right = math.sqrt(nose_to_right[0] ** 2 + nose_to_right[1] ** 2)
|
174 |
-
cos_angle = dot_product / (magnitude_left * magnitude_right)
|
175 |
-
angle = math.acos(cos_angle)
|
176 |
-
angle_degrees = math.degrees(angle)
|
177 |
-
return abs(180 - angle_degrees) < threshold
|
178 |
-
|
179 |
-
|
180 |
-
def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size):
|
181 |
-
embeddings_by_frame = {}
|
182 |
-
emotions_by_frame = {}
|
183 |
-
posture_scores_by_frame = {}
|
184 |
-
posture_landmarks_by_frame = {}
|
185 |
-
aligned_face_paths = []
|
186 |
-
frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith('.jpg')])
|
187 |
-
|
188 |
-
for i in range(0, len(frame_files), batch_size):
|
189 |
-
batch_files = frame_files[i:i + batch_size]
|
190 |
-
batch_frames = []
|
191 |
-
batch_nums = []
|
192 |
-
|
193 |
-
for frame_file in batch_files:
|
194 |
-
frame_num = int(frame_file.split('_')[1].split('.')[0])
|
195 |
-
frame_path = os.path.join(frames_folder, frame_file)
|
196 |
-
frame = cv2.imread(frame_path)
|
197 |
-
if frame is not None:
|
198 |
-
batch_frames.append(frame)
|
199 |
-
batch_nums.append(frame_num)
|
200 |
-
|
201 |
-
if batch_frames:
|
202 |
-
batch_boxes, batch_probs = mtcnn.detect(batch_frames)
|
203 |
-
|
204 |
-
for j, (frame, frame_num, boxes, probs) in enumerate(
|
205 |
-
zip(batch_frames, batch_nums, batch_boxes, batch_probs)):
|
206 |
-
|
207 |
-
# Calculate posture score for the full frame
|
208 |
-
posture_score, posture_landmarks = calculate_posture_score(frame)
|
209 |
-
posture_scores_by_frame[frame_num] = posture_score
|
210 |
-
posture_landmarks_by_frame[frame_num] = posture_landmarks
|
211 |
-
|
212 |
-
if boxes is not None and len(boxes) > 0 and probs[0] >= 0.99:
|
213 |
-
x1, y1, x2, y2 = [int(b) for b in boxes[0]]
|
214 |
-
face = frame[y1:y2, x1:x2]
|
215 |
-
if face.size > 0:
|
216 |
-
results = face_mesh.process(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
|
217 |
-
if results.multi_face_landmarks and is_frontal_face(results.multi_face_landmarks[0].landmark):
|
218 |
-
|
219 |
-
#aligned_face = alignFace(face)
|
220 |
-
aligned_face = face
|
221 |
-
|
222 |
-
if aligned_face is not None:
|
223 |
-
aligned_face_resized = cv2.resize(aligned_face, (160, 160))
|
224 |
-
output_path = os.path.join(aligned_faces_folder, f"frame_{frame_num}_face.jpg")
|
225 |
-
cv2.imwrite(output_path, aligned_face_resized)
|
226 |
-
aligned_face_paths.append(output_path)
|
227 |
-
embedding = get_face_embedding(aligned_face_resized)
|
228 |
-
embeddings_by_frame[frame_num] = embedding
|
229 |
-
|
230 |
-
progress((i + len(batch_files)) / len(frame_files),
|
231 |
-
f"Processing frames {i + 1} to {min(i + len(batch_files), len(frame_files))} of {len(frame_files)}")
|
232 |
-
|
233 |
-
return embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths
|
234 |
-
|
235 |
-
|
236 |
-
def cluster_faces(embeddings):
|
237 |
-
if len(embeddings) < 2:
|
238 |
-
print("Not enough faces for clustering. Assigning all to one cluster.")
|
239 |
-
return np.zeros(len(embeddings), dtype=int)
|
240 |
-
|
241 |
-
X = np.stack(embeddings)
|
242 |
-
dbscan = DBSCAN(eps=0.5, min_samples=5, metric='cosine')
|
243 |
-
clusters = dbscan.fit_predict(X)
|
244 |
-
|
245 |
-
if np.all(clusters == -1):
|
246 |
-
print("DBSCAN assigned all to noise. Considering as one cluster.")
|
247 |
-
return np.zeros(len(embeddings), dtype=int)
|
248 |
-
|
249 |
-
return clusters
|
250 |
-
|
251 |
-
def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder):
|
252 |
-
for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
|
253 |
-
person_folder = os.path.join(organized_faces_folder, f"person_{cluster}")
|
254 |
-
os.makedirs(person_folder, exist_ok=True)
|
255 |
-
src = os.path.join(aligned_faces_folder, f"frame_{frame_num}_face.jpg")
|
256 |
-
dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
|
257 |
-
shutil.copy(src, dst)
|
258 |
-
|
259 |
-
def save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps, original_fps, output_folder, video_duration):
|
260 |
-
person_data = {}
|
261 |
-
|
262 |
-
for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
|
263 |
-
if cluster not in person_data:
|
264 |
-
person_data[cluster] = []
|
265 |
-
person_data[cluster].append((frame_num, embedding))
|
266 |
-
|
267 |
-
largest_cluster = max(person_data, key=lambda k: len(person_data[k]))
|
268 |
-
|
269 |
-
data = person_data[largest_cluster]
|
270 |
-
data.sort(key=lambda x: x[0])
|
271 |
-
frames, embeddings = zip(*data)
|
272 |
-
|
273 |
-
embeddings_array = np.array(embeddings)
|
274 |
-
np.save(os.path.join(output_folder, 'face_embeddings.npy'), embeddings_array)
|
275 |
-
|
276 |
-
total_frames = max(frames)
|
277 |
-
timecodes = [frame_to_timecode(frame, total_frames, video_duration) for frame in frames]
|
278 |
-
|
279 |
-
df_data = {
|
280 |
-
'Frame': frames,
|
281 |
-
'Timecode': timecodes,
|
282 |
-
'Embedding_Index': range(len(embeddings))
|
283 |
-
}
|
284 |
-
|
285 |
-
for i in range(len(embeddings[0])):
|
286 |
-
df_data[f'Raw_Embedding_{i}'] = [embedding[i] for embedding in embeddings]
|
287 |
-
|
288 |
-
df = pd.DataFrame(df_data)
|
289 |
-
|
290 |
-
return df, largest_cluster
|
291 |
-
|
292 |
-
class Autoencoder(nn.Module):
|
293 |
-
def __init__(self, input_size):
|
294 |
-
super(Autoencoder, self).__init__()
|
295 |
-
self.encoder = nn.Sequential(
|
296 |
-
nn.Linear(input_size, 256),
|
297 |
-
nn.ReLU(),
|
298 |
-
nn.Linear(256, 128),
|
299 |
-
nn.ReLU(),
|
300 |
-
nn.Linear(128, 64),
|
301 |
-
nn.ReLU(),
|
302 |
-
nn.Linear(64, 32)
|
303 |
-
)
|
304 |
-
self.decoder = nn.Sequential(
|
305 |
-
nn.Linear(32, 64),
|
306 |
-
nn.ReLU(),
|
307 |
-
nn.Linear(64, 128),
|
308 |
-
nn.ReLU(),
|
309 |
-
nn.Linear(128, 256),
|
310 |
-
nn.ReLU(),
|
311 |
-
nn.Linear(256, input_size)
|
312 |
-
)
|
313 |
-
|
314 |
-
def forward(self, x):
|
315 |
-
batch_size, seq_len, _ = x.size()
|
316 |
-
x = x.view(batch_size * seq_len, -1)
|
317 |
-
encoded = self.encoder(x)
|
318 |
-
decoded = self.decoder(encoded)
|
319 |
-
return decoded.view(batch_size, seq_len, -1)
|
320 |
-
|
321 |
-
def determine_anomalies(mse_values, threshold):
|
322 |
-
mean = np.mean(mse_values)
|
323 |
-
std = np.std(mse_values)
|
324 |
-
anomalies = mse_values > (mean + threshold * std)
|
325 |
-
return anomalies
|
326 |
-
|
327 |
-
def anomaly_detection(X_embeddings, X_posture, epochs=200, batch_size=8, patience=5):
|
328 |
-
# Normalize posture
|
329 |
-
scaler_posture = MinMaxScaler()
|
330 |
-
X_posture_scaled = scaler_posture.fit_transform(X_posture.reshape(-1, 1))
|
331 |
-
|
332 |
-
# Process facial embeddings
|
333 |
-
X_embeddings = torch.FloatTensor(X_embeddings).to(device)
|
334 |
-
if X_embeddings.dim() == 2:
|
335 |
-
X_embeddings = X_embeddings.unsqueeze(0)
|
336 |
-
|
337 |
-
# Process posture
|
338 |
-
X_posture_scaled = torch.FloatTensor(X_posture_scaled).to(device)
|
339 |
-
if X_posture_scaled.dim() == 2:
|
340 |
-
X_posture_scaled = X_posture_scaled.unsqueeze(0)
|
341 |
-
|
342 |
-
model_embeddings = Autoencoder(input_size=X_embeddings.shape[2]).to(device)
|
343 |
-
model_posture = Autoencoder(input_size=X_posture_scaled.shape[2]).to(device)
|
344 |
-
|
345 |
-
criterion = nn.MSELoss()
|
346 |
-
optimizer_embeddings = optim.Adam(model_embeddings.parameters())
|
347 |
-
optimizer_posture = optim.Adam(model_posture.parameters())
|
348 |
-
|
349 |
-
# Train models
|
350 |
-
for epoch in range(epochs):
|
351 |
-
for model, optimizer, X in [(model_embeddings, optimizer_embeddings, X_embeddings),
|
352 |
-
(model_posture, optimizer_posture, X_posture_scaled)]:
|
353 |
-
model.train()
|
354 |
-
optimizer.zero_grad()
|
355 |
-
output = model(X)
|
356 |
-
loss = criterion(output, X)
|
357 |
-
loss.backward()
|
358 |
-
optimizer.step()
|
359 |
-
|
360 |
-
# Compute MSE for embeddings and posture
|
361 |
-
model_embeddings.eval()
|
362 |
-
model_posture.eval()
|
363 |
-
with torch.no_grad():
|
364 |
-
reconstructed_embeddings = model_embeddings(X_embeddings).cpu().numpy()
|
365 |
-
reconstructed_posture = model_posture(X_posture_scaled).cpu().numpy()
|
366 |
-
|
367 |
-
mse_embeddings = np.mean(np.power(X_embeddings.cpu().numpy() - reconstructed_embeddings, 2), axis=2).squeeze()
|
368 |
-
mse_posture = np.mean(np.power(X_posture_scaled.cpu().numpy() - reconstructed_posture, 2), axis=2).squeeze()
|
369 |
-
|
370 |
-
return mse_embeddings, mse_posture
|
371 |
-
|
372 |
-
def plot_mse(df, mse_values, title, color='navy', time_threshold=3, anomaly_threshold=4):
|
373 |
-
plt.figure(figsize=(16, 8), dpi=400)
|
374 |
-
fig, ax = plt.subplots(figsize=(16, 8))
|
375 |
-
|
376 |
-
if 'Seconds' not in df.columns:
|
377 |
-
df['Seconds'] = df['Timecode'].apply(
|
378 |
-
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
379 |
-
|
380 |
-
# Ensure df and mse_values have the same length and remove NaN values
|
381 |
-
min_length = min(len(df), len(mse_values))
|
382 |
-
df = df.iloc[:min_length]
|
383 |
-
mse_values = mse_values[:min_length]
|
384 |
-
|
385 |
-
# Remove NaN values
|
386 |
-
mask = ~np.isnan(mse_values)
|
387 |
-
df = df[mask]
|
388 |
-
mse_values = mse_values[mask]
|
389 |
-
|
390 |
-
mean = pd.Series(mse_values).rolling(window=10).mean()
|
391 |
-
std = pd.Series(mse_values).rolling(window=10).std()
|
392 |
-
median = np.median(mse_values)
|
393 |
-
|
394 |
-
ax.scatter(df['Seconds'], mse_values, color=color, alpha=0.3, s=5)
|
395 |
-
ax.plot(df['Seconds'], mean, color=color, linewidth=0.5)
|
396 |
-
ax.fill_between(df['Seconds'], mean - std, mean + std, color=color, alpha=0.1)
|
397 |
-
|
398 |
-
# Add median line
|
399 |
-
ax.axhline(y=median, color='black', linestyle='--', label='Median Baseline')
|
400 |
-
|
401 |
-
# Add threshold line
|
402 |
-
threshold = np.mean(mse_values) + anomaly_threshold * np.std(mse_values)
|
403 |
-
ax.axhline(y=threshold, color='red', linestyle='--', label=f'Threshold: {anomaly_threshold:.1f}')
|
404 |
-
ax.text(ax.get_xlim()[1], threshold, f'Threshold: {anomaly_threshold:.1f}', verticalalignment='center', horizontalalignment='left', color='red')
|
405 |
-
|
406 |
-
anomalies = determine_anomalies(mse_values, anomaly_threshold)
|
407 |
-
anomaly_frames = df['Frame'].iloc[anomalies].tolist()
|
408 |
-
|
409 |
-
ax.scatter(df['Seconds'].iloc[anomalies], mse_values[anomalies], color='red', s=20, zorder=5)
|
410 |
-
|
411 |
-
anomaly_data = list(zip(df['Timecode'].iloc[anomalies],
|
412 |
-
df['Seconds'].iloc[anomalies],
|
413 |
-
mse_values[anomalies]))
|
414 |
-
anomaly_data.sort(key=lambda x: x[1])
|
415 |
-
|
416 |
-
grouped_anomalies = []
|
417 |
-
current_group = []
|
418 |
-
for timecode, sec, mse in anomaly_data:
|
419 |
-
if not current_group or sec - current_group[-1][1] <= time_threshold:
|
420 |
-
current_group.append((timecode, sec, mse))
|
421 |
-
else:
|
422 |
-
grouped_anomalies.append(current_group)
|
423 |
-
current_group = [(timecode, sec, mse)]
|
424 |
-
if current_group:
|
425 |
-
grouped_anomalies.append(current_group)
|
426 |
-
|
427 |
-
for group in grouped_anomalies:
|
428 |
-
start_sec = group[0][1]
|
429 |
-
end_sec = group[-1][1]
|
430 |
-
rect = Rectangle((start_sec, ax.get_ylim()[0]), end_sec - start_sec, ax.get_ylim()[1] - ax.get_ylim()[0],
|
431 |
-
facecolor='red', alpha=0.2, zorder=1)
|
432 |
-
ax.add_patch(rect)
|
433 |
-
|
434 |
-
for group in grouped_anomalies:
|
435 |
-
highest_mse_anomaly = max(group, key=lambda x: x[2])
|
436 |
-
timecode, sec, mse = highest_mse_anomaly
|
437 |
-
ax.annotate(timecode, (sec, mse), textcoords="offset points", xytext=(0, 10),
|
438 |
-
ha='center', fontsize=6, color='red')
|
439 |
-
|
440 |
-
max_seconds = df['Seconds'].max()
|
441 |
-
num_ticks = 100
|
442 |
-
tick_locations = np.linspace(0, max_seconds, num_ticks)
|
443 |
-
tick_labels = [seconds_to_timecode(int(s)) for s in tick_locations]
|
444 |
-
|
445 |
-
ax.set_xticks(tick_locations)
|
446 |
-
ax.set_xticklabels(tick_labels, rotation=90, ha='center', fontsize=6)
|
447 |
-
|
448 |
-
ax.set_xlabel('Timecode')
|
449 |
-
ax.set_ylabel('Mean Squared Error')
|
450 |
-
ax.set_title(title)
|
451 |
-
|
452 |
-
ax.grid(True, linestyle='--', alpha=0.7)
|
453 |
-
ax.legend()
|
454 |
-
plt.tight_layout()
|
455 |
-
plt.close()
|
456 |
-
return fig, anomaly_frames
|
457 |
-
|
458 |
-
def plot_mse_histogram(mse_values, title, anomaly_threshold, color='blue'):
|
459 |
-
plt.figure(figsize=(16, 4), dpi=400)
|
460 |
-
fig, ax = plt.subplots(figsize=(16, 4))
|
461 |
-
|
462 |
-
ax.hist(mse_values, bins=100, edgecolor='black', color=color, alpha=0.7)
|
463 |
-
ax.set_xlabel('Mean Squared Error')
|
464 |
-
ax.set_ylabel('Number of Samples')
|
465 |
-
ax.set_title(title)
|
466 |
-
|
467 |
-
mean = np.mean(mse_values)
|
468 |
-
std = np.std(mse_values)
|
469 |
-
threshold = mean + anomaly_threshold * std
|
470 |
-
|
471 |
-
ax.axvline(x=threshold, color='red', linestyle='--', linewidth=2)
|
472 |
-
|
473 |
-
# Move annotation to the bottom and away from the line
|
474 |
-
ax.annotate(f'Threshold: {anomaly_threshold:.1f}',
|
475 |
-
xy=(threshold, ax.get_ylim()[0]),
|
476 |
-
xytext=(0, -20),
|
477 |
-
textcoords='offset points',
|
478 |
-
ha='center', va='top',
|
479 |
-
bbox=dict(boxstyle='round,pad=0.5', fc='white', ec='none', alpha=0.7),
|
480 |
-
color='red')
|
481 |
-
|
482 |
-
plt.tight_layout()
|
483 |
-
plt.close()
|
484 |
-
return fig
|
485 |
-
|
486 |
-
|
487 |
-
def plot_posture(df, posture_scores, color='blue', anomaly_threshold=4):
|
488 |
-
plt.figure(figsize=(16, 8), dpi=400)
|
489 |
-
fig, ax = plt.subplots(figsize=(16, 8))
|
490 |
-
|
491 |
-
df['Seconds'] = df['Timecode'].apply(
|
492 |
-
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
493 |
-
|
494 |
-
posture_data = [(frame, score) for frame, score in posture_scores.items() if score is not None]
|
495 |
-
posture_frames, posture_scores = zip(*posture_data)
|
496 |
-
|
497 |
-
# Create a new dataframe for posture data
|
498 |
-
posture_df = pd.DataFrame({'Frame': posture_frames, 'Score': posture_scores})
|
499 |
-
posture_df = posture_df.merge(df[['Frame', 'Seconds']], on='Frame', how='inner')
|
500 |
-
|
501 |
-
ax.scatter(posture_df['Seconds'], posture_df['Score'], color=color, alpha=0.3, s=5)
|
502 |
-
mean = posture_df['Score'].rolling(window=10).mean()
|
503 |
-
ax.plot(posture_df['Seconds'], mean, color=color, linewidth=0.5)
|
504 |
-
|
505 |
-
ax.set_xlabel('Timecode')
|
506 |
-
ax.set_ylabel('Posture Score')
|
507 |
-
ax.set_title("Body Posture Over Time")
|
508 |
-
|
509 |
-
ax.grid(True, linestyle='--', alpha=0.7)
|
510 |
-
|
511 |
-
max_seconds = df['Seconds'].max()
|
512 |
-
num_ticks = 80
|
513 |
-
tick_locations = np.linspace(0, max_seconds, num_ticks)
|
514 |
-
tick_labels = [seconds_to_timecode(int(s)) for s in tick_locations]
|
515 |
-
|
516 |
-
ax.set_xticks(tick_locations)
|
517 |
-
ax.set_xticklabels(tick_labels, rotation=90, ha='center', fontsize=6)
|
518 |
-
|
519 |
-
plt.tight_layout()
|
520 |
-
plt.close()
|
521 |
-
return fig
|
522 |
-
|
523 |
-
|
524 |
-
def plot_mse_heatmap(mse_values, title, df):
|
525 |
-
plt.figure(figsize=(20, 5), dpi=400)
|
526 |
-
fig, ax = plt.subplots(figsize=(20, 5))
|
527 |
-
|
528 |
-
# Reshape MSE values to 2D array for heatmap
|
529 |
-
mse_2d = mse_values.reshape(1, -1)
|
530 |
-
|
531 |
-
# Create heatmap
|
532 |
-
sns.heatmap(mse_2d, cmap='YlOrRd', cbar_kws={'label': 'MSE'}, ax=ax)
|
533 |
-
|
534 |
-
# Set x-axis ticks to timecodes
|
535 |
-
num_ticks = 60
|
536 |
-
tick_locations = np.linspace(0, len(mse_values) - 1, num_ticks).astype(int)
|
537 |
-
tick_labels = [df['Timecode'].iloc[i] for i in tick_locations]
|
538 |
-
|
539 |
-
ax.set_xticks(tick_locations)
|
540 |
-
ax.set_xticklabels(tick_labels, rotation=90, ha='center', va='top') # Adjusted rotation and alignment
|
541 |
-
|
542 |
-
ax.set_title(title)
|
543 |
-
|
544 |
-
# Remove y-axis labels
|
545 |
-
ax.set_yticks([])
|
546 |
-
|
547 |
-
plt.tight_layout() # Ensure all elements fit within the figure
|
548 |
-
plt.close()
|
549 |
-
return fig
|
550 |
-
|
551 |
-
def draw_pose_landmarks(frame, landmarks):
|
552 |
-
annotated_frame = frame.copy()
|
553 |
-
# Include relevant landmarks for head position and body
|
554 |
-
body_landmarks = [
|
555 |
-
mp_pose.PoseLandmark.NOSE,
|
556 |
-
mp_pose.PoseLandmark.LEFT_SHOULDER,
|
557 |
-
mp_pose.PoseLandmark.RIGHT_SHOULDER,
|
558 |
-
mp_pose.PoseLandmark.LEFT_EAR,
|
559 |
-
mp_pose.PoseLandmark.RIGHT_EAR,
|
560 |
-
mp_pose.PoseLandmark.LEFT_ELBOW,
|
561 |
-
mp_pose.PoseLandmark.RIGHT_ELBOW,
|
562 |
-
mp_pose.PoseLandmark.LEFT_WRIST,
|
563 |
-
mp_pose.PoseLandmark.RIGHT_WRIST,
|
564 |
-
mp_pose.PoseLandmark.LEFT_HIP,
|
565 |
-
mp_pose.PoseLandmark.RIGHT_HIP,
|
566 |
-
mp_pose.PoseLandmark.LEFT_KNEE,
|
567 |
-
mp_pose.PoseLandmark.RIGHT_KNEE,
|
568 |
-
mp_pose.PoseLandmark.LEFT_ANKLE,
|
569 |
-
mp_pose.PoseLandmark.RIGHT_ANKLE
|
570 |
-
]
|
571 |
-
|
572 |
-
# Connections for head position and body
|
573 |
-
body_connections = [
|
574 |
-
(mp_pose.PoseLandmark.LEFT_EAR, mp_pose.PoseLandmark.LEFT_SHOULDER),
|
575 |
-
(mp_pose.PoseLandmark.RIGHT_EAR, mp_pose.PoseLandmark.RIGHT_SHOULDER),
|
576 |
-
(mp_pose.PoseLandmark.NOSE, mp_pose.PoseLandmark.LEFT_SHOULDER),
|
577 |
-
(mp_pose.PoseLandmark.NOSE, mp_pose.PoseLandmark.RIGHT_SHOULDER),
|
578 |
-
(mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.RIGHT_SHOULDER),
|
579 |
-
(mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.LEFT_ELBOW),
|
580 |
-
(mp_pose.PoseLandmark.RIGHT_SHOULDER, mp_pose.PoseLandmark.RIGHT_ELBOW),
|
581 |
-
(mp_pose.PoseLandmark.LEFT_ELBOW, mp_pose.PoseLandmark.LEFT_WRIST),
|
582 |
-
(mp_pose.PoseLandmark.RIGHT_ELBOW, mp_pose.PoseLandmark.RIGHT_WRIST),
|
583 |
-
(mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.LEFT_HIP),
|
584 |
-
(mp_pose.PoseLandmark.RIGHT_SHOULDER, mp_pose.PoseLandmark.RIGHT_HIP),
|
585 |
-
(mp_pose.PoseLandmark.LEFT_HIP, mp_pose.PoseLandmark.RIGHT_HIP),
|
586 |
-
(mp_pose.PoseLandmark.LEFT_HIP, mp_pose.PoseLandmark.LEFT_KNEE),
|
587 |
-
(mp_pose.PoseLandmark.RIGHT_HIP, mp_pose.PoseLandmark.RIGHT_KNEE),
|
588 |
-
(mp_pose.PoseLandmark.LEFT_KNEE, mp_pose.PoseLandmark.LEFT_ANKLE),
|
589 |
-
(mp_pose.PoseLandmark.RIGHT_KNEE, mp_pose.PoseLandmark.RIGHT_ANKLE)
|
590 |
-
]
|
591 |
-
|
592 |
-
# Draw landmarks
|
593 |
-
for landmark in body_landmarks:
|
594 |
-
if landmark in landmarks.landmark:
|
595 |
-
lm = landmarks.landmark[landmark]
|
596 |
-
h, w, _ = annotated_frame.shape
|
597 |
-
cx, cy = int(lm.x * w), int(lm.y * h)
|
598 |
-
cv2.circle(annotated_frame, (cx, cy), 5, (245, 117, 66), -1)
|
599 |
-
|
600 |
-
# Draw connections
|
601 |
-
for connection in body_connections:
|
602 |
-
start_lm = landmarks.landmark[connection[0]]
|
603 |
-
end_lm = landmarks.landmark[connection[1]]
|
604 |
-
h, w, _ = annotated_frame.shape
|
605 |
-
start_point = (int(start_lm.x * w), int(start_lm.y * h))
|
606 |
-
end_point = (int(end_lm.x * w), int(end_lm.y * h))
|
607 |
-
cv2.line(annotated_frame, start_point, end_point, (245, 66, 230), 2)
|
608 |
-
|
609 |
-
# Highlight head tilt
|
610 |
-
left_ear = landmarks.landmark[mp_pose.PoseLandmark.LEFT_EAR]
|
611 |
-
right_ear = landmarks.landmark[mp_pose.PoseLandmark.RIGHT_EAR]
|
612 |
-
nose = landmarks.landmark[mp_pose.PoseLandmark.NOSE]
|
613 |
-
|
614 |
-
h, w, _ = annotated_frame.shape
|
615 |
-
left_ear_point = (int(left_ear.x * w), int(left_ear.y * h))
|
616 |
-
right_ear_point = (int(right_ear.x * w), int(right_ear.y * h))
|
617 |
-
nose_point = (int(nose.x * w), int(nose.y * h))
|
618 |
-
|
619 |
-
# Draw a line between ears to show head tilt
|
620 |
-
cv2.line(annotated_frame, left_ear_point, right_ear_point, (0, 255, 0), 2)
|
621 |
-
|
622 |
-
# Draw a line from nose to the midpoint between shoulders to show head forward/backward tilt
|
623 |
-
left_shoulder = landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER]
|
624 |
-
right_shoulder = landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER]
|
625 |
-
shoulder_mid_x = (left_shoulder.x + right_shoulder.x) / 2
|
626 |
-
shoulder_mid_y = (left_shoulder.y + right_shoulder.y) / 2
|
627 |
-
shoulder_mid_point = (int(shoulder_mid_x * w), int(shoulder_mid_y * h))
|
628 |
-
cv2.line(annotated_frame, nose_point, shoulder_mid_point, (0, 255, 0), 2)
|
629 |
-
|
630 |
-
return annotated_frame
|
631 |
-
|
632 |
-
def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster, max_samples=500):
|
633 |
-
face_samples = {"most_frequent": [], "others": []}
|
634 |
-
for cluster_folder in sorted(os.listdir(organized_faces_folder)):
|
635 |
-
if cluster_folder.startswith("person_"):
|
636 |
-
person_folder = os.path.join(organized_faces_folder, cluster_folder)
|
637 |
-
face_files = sorted([f for f in os.listdir(person_folder) if f.endswith('.jpg')])
|
638 |
-
if face_files:
|
639 |
-
cluster_id = int(cluster_folder.split('_')[1])
|
640 |
-
if cluster_id == largest_cluster:
|
641 |
-
for i, sample in enumerate(face_files[:max_samples]):
|
642 |
-
face_path = os.path.join(person_folder, sample)
|
643 |
-
output_path = os.path.join(output_folder, f"face_sample_most_frequent_{i:04d}.jpg")
|
644 |
-
face_img = cv2.imread(face_path)
|
645 |
-
if face_img is not None:
|
646 |
-
small_face = cv2.resize(face_img, (160, 160))
|
647 |
-
cv2.imwrite(output_path, small_face)
|
648 |
-
face_samples["most_frequent"].append(output_path)
|
649 |
-
if len(face_samples["most_frequent"]) >= max_samples:
|
650 |
-
break
|
651 |
-
else:
|
652 |
-
remaining_samples = max_samples - len(face_samples["others"])
|
653 |
-
if remaining_samples > 0:
|
654 |
-
for i, sample in enumerate(face_files[:remaining_samples]):
|
655 |
-
face_path = os.path.join(person_folder, sample)
|
656 |
-
output_path = os.path.join(output_folder, f"face_sample_other_{cluster_id:02d}_{i:04d}.jpg")
|
657 |
-
face_img = cv2.imread(face_path)
|
658 |
-
if face_img is not None:
|
659 |
-
small_face = cv2.resize(face_img, (160, 160))
|
660 |
-
cv2.imwrite(output_path, small_face)
|
661 |
-
face_samples["others"].append(output_path)
|
662 |
-
if len(face_samples["others"]) >= max_samples:
|
663 |
-
break
|
664 |
-
return face_samples
|
665 |
-
|
666 |
-
|
667 |
-
def process_video(video_path, anomaly_threshold, desired_fps, progress=gr.Progress()):
|
668 |
-
start_time = time.time()
|
669 |
-
output_folder = "output"
|
670 |
-
os.makedirs(output_folder, exist_ok=True)
|
671 |
-
batch_size = 16
|
672 |
-
|
673 |
-
GRAPH_COLORS = {
|
674 |
-
'facial_embeddings': 'navy',
|
675 |
-
'body_posture': 'purple'
|
676 |
-
}
|
677 |
-
|
678 |
-
with tempfile.TemporaryDirectory() as temp_dir:
|
679 |
-
aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
|
680 |
-
organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
|
681 |
-
os.makedirs(aligned_faces_folder, exist_ok=True)
|
682 |
-
os.makedirs(organized_faces_folder, exist_ok=True)
|
683 |
-
|
684 |
-
clip = VideoFileClip(video_path)
|
685 |
-
video_duration = clip.duration
|
686 |
-
clip.close()
|
687 |
-
|
688 |
-
progress(0, "Starting frame extraction")
|
689 |
-
frames_folder = os.path.join(temp_dir, 'extracted_frames')
|
690 |
-
|
691 |
-
def extraction_progress(percent, message):
|
692 |
-
progress(percent / 100, f"Extracting frames")
|
693 |
-
|
694 |
-
frame_count, original_fps = extract_frames(video_path, frames_folder, desired_fps, extraction_progress)
|
695 |
-
|
696 |
-
progress(1, "Frame extraction complete")
|
697 |
-
progress(0.3, "Processing frames")
|
698 |
-
embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths = process_frames(
|
699 |
-
frames_folder, aligned_faces_folder,
|
700 |
-
frame_count,
|
701 |
-
progress, batch_size)
|
702 |
-
|
703 |
-
if not aligned_face_paths:
|
704 |
-
raise ValueError("No faces were extracted from the video.")
|
705 |
-
|
706 |
-
progress(0.6, "Clustering faces")
|
707 |
-
embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
|
708 |
-
clusters = cluster_faces(embeddings)
|
709 |
-
num_clusters = len(set(clusters))
|
710 |
-
|
711 |
-
progress(0.7, "Organizing faces")
|
712 |
-
organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
|
713 |
-
|
714 |
-
progress(0.8, "Saving person data")
|
715 |
-
df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps,
|
716 |
-
original_fps, temp_dir, video_duration)
|
717 |
-
|
718 |
-
# Add 'Seconds' column to df
|
719 |
-
df['Seconds'] = df['Timecode'].apply(
|
720 |
-
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
721 |
-
|
722 |
-
progress(0.85, "Getting face samples")
|
723 |
-
face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
|
724 |
-
|
725 |
-
progress(0.9, "Performing anomaly detection")
|
726 |
-
embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
|
727 |
-
|
728 |
-
X_embeddings = df[embedding_columns].values
|
729 |
-
|
730 |
-
try:
|
731 |
-
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
732 |
-
X_posture = X_posture[X_posture != None].reshape(-1, 1) # Remove None values and reshape
|
733 |
-
|
734 |
-
# Ensure X_posture is not empty
|
735 |
-
if len(X_posture) == 0:
|
736 |
-
raise ValueError("No valid posture data found")
|
737 |
-
|
738 |
-
mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture, batch_size=batch_size)
|
739 |
-
|
740 |
-
progress(0.95, "Generating plots")
|
741 |
-
mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
|
742 |
-
color=GRAPH_COLORS['facial_embeddings'],
|
743 |
-
anomaly_threshold=anomaly_threshold)
|
744 |
-
|
745 |
-
mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
|
746 |
-
anomaly_threshold, color=GRAPH_COLORS['facial_embeddings'])
|
747 |
-
|
748 |
-
mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
|
749 |
-
color=GRAPH_COLORS['body_posture'],
|
750 |
-
anomaly_threshold=anomaly_threshold)
|
751 |
-
|
752 |
-
mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
|
753 |
-
anomaly_threshold, color=GRAPH_COLORS['body_posture'])
|
754 |
-
|
755 |
-
mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
|
756 |
-
mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
|
757 |
-
|
758 |
-
except Exception as e:
|
759 |
-
print(f"Error details: {str(e)}")
|
760 |
-
import traceback
|
761 |
-
traceback.print_exc()
|
762 |
-
return (f"Error in video processing: {str(e)}",) + (None,) * 14
|
763 |
-
|
764 |
-
progress(1.0, "Preparing results")
|
765 |
-
results = f"Number of persons detected: {num_clusters}\n\n"
|
766 |
-
results += "Breakdown:\n"
|
767 |
-
for cluster_id in range(num_clusters):
|
768 |
-
face_count = len([c for c in clusters if c == cluster_id])
|
769 |
-
results += f"Person {cluster_id + 1}: {face_count} face frames\n"
|
770 |
-
|
771 |
-
end_time = time.time()
|
772 |
-
execution_time = end_time - start_time
|
773 |
-
|
774 |
-
def add_timecode_to_image(image, timecode):
|
775 |
-
img_pil = Image.fromarray(image)
|
776 |
-
draw = ImageDraw.Draw(img_pil)
|
777 |
-
font = ImageFont.truetype("arial.ttf", 15)
|
778 |
-
draw.text((10, 10), timecode, (255, 0, 0), font=font)
|
779 |
-
return np.array(img_pil)
|
780 |
-
|
781 |
-
# In the process_video function, update the anomaly frame processing:
|
782 |
-
anomaly_faces_embeddings = []
|
783 |
-
for frame in anomaly_frames_embeddings:
|
784 |
-
face_path = os.path.join(aligned_faces_folder, f"frame_{frame}_face.jpg")
|
785 |
-
if os.path.exists(face_path):
|
786 |
-
face_img = cv2.imread(face_path)
|
787 |
-
if face_img is not None:
|
788 |
-
face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
|
789 |
-
timecode = df[df['Frame'] == frame]['Timecode'].iloc[0]
|
790 |
-
face_img_with_timecode = add_timecode_to_image(face_img, timecode)
|
791 |
-
anomaly_faces_embeddings.append(face_img_with_timecode)
|
792 |
-
|
793 |
-
anomaly_frames_posture_images = []
|
794 |
-
for frame in anomaly_frames_posture:
|
795 |
-
frame_path = os.path.join(frames_folder, f"frame_{frame:04d}.jpg")
|
796 |
-
if os.path.exists(frame_path):
|
797 |
-
frame_img = cv2.imread(frame_path)
|
798 |
-
if frame_img is not None:
|
799 |
-
frame_img = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)
|
800 |
-
pose_results = pose.process(frame_img)
|
801 |
-
if pose_results.pose_landmarks:
|
802 |
-
frame_img = draw_pose_landmarks(frame_img, pose_results.pose_landmarks)
|
803 |
-
timecode = df[df['Frame'] == frame]['Timecode'].iloc[0]
|
804 |
-
frame_img_with_timecode = add_timecode_to_image(frame_img, timecode)
|
805 |
-
anomaly_frames_posture_images.append(frame_img_with_timecode)
|
806 |
-
|
807 |
-
return (
|
808 |
-
execution_time,
|
809 |
-
results,
|
810 |
-
df,
|
811 |
-
mse_embeddings,
|
812 |
-
mse_posture,
|
813 |
-
mse_plot_embeddings,
|
814 |
-
mse_histogram_embeddings,
|
815 |
-
mse_plot_posture,
|
816 |
-
mse_histogram_posture,
|
817 |
-
mse_heatmap_embeddings,
|
818 |
-
mse_heatmap_posture,
|
819 |
-
face_samples["most_frequent"],
|
820 |
-
face_samples["others"],
|
821 |
-
anomaly_faces_embeddings,
|
822 |
-
anomaly_frames_posture_images,
|
823 |
-
aligned_faces_folder,
|
824 |
-
frames_folder
|
825 |
-
)
|
826 |
-
|
827 |
-
|
828 |
-
with gr.Blocks() as iface:
|
829 |
-
gr.Markdown("""
|
830 |
-
# Facial Expression and Body Language Anomaly Detection
|
831 |
-
|
832 |
-
This application analyzes videos to detect anomalies in facial features and body language.
|
833 |
-
It processes the video frames to extract facial embeddings and body posture,
|
834 |
-
then uses machine learning techniques to identify unusual patterns or deviations from the norm.
|
835 |
-
|
836 |
-
For more information, visit: [https://github.com/reab5555/Facial-Expression-Anomaly-Detection](https://github.com/reab5555/Facial-Expression-Anomaly-Detection)
|
837 |
-
""")
|
838 |
-
|
839 |
-
with gr.Row():
|
840 |
-
video_input = gr.Video()
|
841 |
-
|
842 |
-
anomaly_threshold = gr.Slider(minimum=1, maximum=5, step=0.1, value=3, label="Anomaly Detection Threshold")
|
843 |
-
process_btn = gr.Button("Process Video")
|
844 |
-
progress_bar = gr.Progress()
|
845 |
-
execution_time = gr.Number(label="Execution Time (seconds)")
|
846 |
-
|
847 |
-
with gr.Group(visible=False) as results_group:
|
848 |
-
results_text = gr.TextArea(label="Anomaly Detection Results", lines=4)
|
849 |
-
|
850 |
-
with gr.Tab("Facial Features"):
|
851 |
-
mse_features_plot = gr.Plot(label="MSE: Facial Features")
|
852 |
-
mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
|
853 |
-
mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
|
854 |
-
anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
|
855 |
-
|
856 |
-
with gr.Tab("Body Posture"):
|
857 |
-
mse_posture_plot = gr.Plot(label="MSE: Body Posture")
|
858 |
-
mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
|
859 |
-
mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
|
860 |
-
anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
|
861 |
-
|
862 |
-
with gr.Tab("Face Samples"):
|
863 |
-
face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples (Target)", columns=6, rows=2, height="auto")
|
864 |
-
face_samples_others = gr.Gallery(label="Other Persons Samples", columns=6, rows=1, height="auto")
|
865 |
-
|
866 |
-
# Hidden components to store intermediate results
|
867 |
-
df_store = gr.State()
|
868 |
-
mse_features_store = gr.State()
|
869 |
-
mse_posture_store = gr.State()
|
870 |
-
aligned_faces_folder_store = gr.State()
|
871 |
-
frames_folder_store = gr.State()
|
872 |
-
mse_heatmap_embeddings_store = gr.State()
|
873 |
-
mse_heatmap_posture_store = gr.State()
|
874 |
-
|
875 |
-
def process_and_show_completion(video_input_path, anomaly_threshold_input):
|
876 |
-
try:
|
877 |
-
print("Starting video processing...")
|
878 |
-
results = process_video(video_input_path, anomaly_threshold_input, FIXED_FPS, progress=progress_bar)
|
879 |
-
print("Video processing completed.")
|
880 |
-
|
881 |
-
if isinstance(results[0], str) and results[0].startswith("Error"):
|
882 |
-
print(f"Error occurred: {results[0]}")
|
883 |
-
return [results[0]] + [None] * 18 # Update this line to match the number of outputs
|
884 |
-
|
885 |
-
exec_time, results_summary, df, mse_embeddings, mse_posture, \
|
886 |
-
mse_plot_embeddings, mse_histogram_embeddings, \
|
887 |
-
mse_plot_posture, mse_histogram_posture, \
|
888 |
-
mse_heatmap_embeddings, mse_heatmap_posture, \
|
889 |
-
face_samples_frequent, face_samples_other, \
|
890 |
-
anomaly_faces_embeddings, anomaly_frames_posture_images, \
|
891 |
-
aligned_faces_folder, frames_folder = results
|
892 |
-
|
893 |
-
# Convert numpy arrays to PIL Images for the galleries
|
894 |
-
anomaly_faces_embeddings_pil = [Image.fromarray(face) for face in anomaly_faces_embeddings]
|
895 |
-
anomaly_frames_posture_pil = [Image.fromarray(frame) for frame in anomaly_frames_posture_images]
|
896 |
-
|
897 |
-
# Ensure face samples are in the correct format for Gradio
|
898 |
-
face_samples_frequent = [Image.open(path) for path in face_samples_frequent]
|
899 |
-
face_samples_other = [Image.open(path) for path in face_samples_other]
|
900 |
-
|
901 |
-
output = [
|
902 |
-
exec_time, results_summary,
|
903 |
-
df, mse_embeddings, mse_posture,
|
904 |
-
mse_plot_embeddings, mse_plot_posture,
|
905 |
-
mse_histogram_embeddings, mse_histogram_posture,
|
906 |
-
mse_heatmap_embeddings, mse_heatmap_posture,
|
907 |
-
anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
|
908 |
-
face_samples_frequent, face_samples_other,
|
909 |
-
aligned_faces_folder, frames_folder,
|
910 |
-
mse_embeddings, mse_posture
|
911 |
-
]
|
912 |
-
|
913 |
-
return output
|
914 |
-
|
915 |
-
except Exception as e:
|
916 |
-
error_message = f"An error occurred: {str(e)}"
|
917 |
-
print(error_message)
|
918 |
-
import traceback
|
919 |
-
traceback.print_exc()
|
920 |
-
return [error_message] + [None] * 18
|
921 |
-
|
922 |
-
process_btn.click(
|
923 |
-
process_and_show_completion,
|
924 |
-
inputs=[video_input, anomaly_threshold],
|
925 |
-
outputs=[
|
926 |
-
execution_time, results_text, df_store,
|
927 |
-
mse_features_store, mse_posture_store,
|
928 |
-
mse_features_plot, mse_posture_plot,
|
929 |
-
mse_features_hist, mse_posture_hist,
|
930 |
-
mse_features_heatmap, mse_posture_heatmap,
|
931 |
-
anomaly_frames_features, anomaly_frames_posture,
|
932 |
-
face_samples_most_frequent, face_samples_others,
|
933 |
-
aligned_faces_folder_store, frames_folder_store,
|
934 |
-
mse_heatmap_embeddings_store, mse_heatmap_posture_store
|
935 |
-
]
|
936 |
-
).then(
|
937 |
-
lambda: gr.Group(visible=True),
|
938 |
-
inputs=None,
|
939 |
-
outputs=[results_group]
|
940 |
-
)
|
941 |
-
|
942 |
-
if __name__ == "__main__":
|
943 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|