reab5555 commited on
Commit
cdca32f
·
verified ·
1 Parent(s): aeb4947

Update video_processing.py

Browse files
Files changed (1) hide show
  1. video_processing.py +36 -74
video_processing.py CHANGED
@@ -8,6 +8,7 @@ from PIL import Image, ImageDraw, ImageFont
8
  import math
9
  from face_analysis import get_face_embedding, cluster_faces, organize_faces_by_person, draw_facial_landmarks
10
  from pose_analysis import pose, calculate_posture_score, draw_pose_landmarks
 
11
  from anomaly_detection import anomaly_detection
12
  from visualization import plot_mse, plot_mse_histogram, plot_mse_heatmap, create_video_with_heatmap
13
  from utils import frame_to_timecode
@@ -15,6 +16,7 @@ import pandas as pd
15
  from facenet_pytorch import MTCNN
16
  import torch
17
  import mediapipe as mp
 
18
 
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
  mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
@@ -22,71 +24,6 @@ mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_fac
22
  mp_face_mesh = mp.solutions.face_mesh
23
  face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
24
 
25
- def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
26
- os.makedirs(output_folder, exist_ok=True)
27
- clip = VideoFileClip(video_path)
28
- original_fps = clip.fps
29
- duration = clip.duration
30
- total_frames = int(duration * original_fps)
31
- step = max(1, original_fps / desired_fps)
32
- total_frames_to_extract = int(total_frames / step)
33
-
34
- frame_count = 0
35
- for t in np.arange(0, duration, step / original_fps):
36
- frame = clip.get_frame(t)
37
- cv2.imwrite(os.path.join(output_folder, f"frame_{frame_count:04d}.jpg"), cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
38
- frame_count += 1
39
- if progress_callback:
40
- progress = min(100, (frame_count / total_frames_to_extract) * 100)
41
- progress_callback(progress, f"Extracting frame")
42
- if frame_count >= total_frames_to_extract:
43
- break
44
- clip.close()
45
- return frame_count, original_fps
46
-
47
- def process_frames(frames_folder, aligned_faces_folder, frame_count, progress):
48
- embeddings_by_frame = {}
49
- posture_scores_by_frame = {}
50
- posture_landmarks_by_frame = {}
51
- facial_landmarks_by_frame = {}
52
- aligned_face_paths = []
53
- frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith('.jpg')])
54
-
55
- for i, frame_file in enumerate(frame_files):
56
- frame_num = int(frame_file.split('_')[1].split('.')[0])
57
- frame_path = os.path.join(frames_folder, frame_file)
58
- frame = cv2.imread(frame_path)
59
-
60
- if frame is not None:
61
- posture_score, posture_landmarks = calculate_posture_score(frame)
62
- posture_scores_by_frame[frame_num] = posture_score
63
- posture_landmarks_by_frame[frame_num] = posture_landmarks
64
-
65
- boxes, probs = mtcnn.detect(frame)
66
-
67
- if boxes is not None and len(boxes) > 0 and probs[0] >= 0.99:
68
- x1, y1, x2, y2 = [int(b) for b in boxes[0]]
69
- face = frame[y1:y2, x1:x2]
70
- if face.size > 0:
71
- face_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
72
- results = face_mesh.process(face_rgb)
73
- if results.multi_face_landmarks:
74
- facial_landmarks_by_frame[frame_num] = results.multi_face_landmarks[0]
75
- if is_frontal_face(results.multi_face_landmarks[0].landmark):
76
- aligned_face = face
77
-
78
- if aligned_face is not None:
79
- aligned_face_resized = cv2.resize(aligned_face, (160, 160))
80
- output_path = os.path.join(aligned_faces_folder, f"frame_{frame_num}_face.jpg")
81
- cv2.imwrite(output_path, aligned_face_resized)
82
- aligned_face_paths.append(output_path)
83
- embedding = get_face_embedding(aligned_face_resized)
84
- embeddings_by_frame[frame_num] = embedding
85
-
86
- progress((i + 1) / len(frame_files), f"Processing frame {i + 1} of {len(frame_files)}")
87
-
88
- return embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths, facial_landmarks_by_frame
89
-
90
  def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
91
  start_time = time.time()
92
  output_folder = "output"
@@ -94,7 +31,8 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
94
 
95
  GRAPH_COLORS = {
96
  'facial_embeddings': 'navy',
97
- 'body_posture': 'purple'
 
98
  }
99
 
100
  with tempfile.TemporaryDirectory() as temp_dir:
@@ -147,7 +85,13 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
147
  progress(0.75, "Getting face samples")
148
  face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
149
 
150
- progress(0.8, "Performing anomaly detection")
 
 
 
 
 
 
151
  embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
152
 
153
  X_embeddings = df[embedding_columns].values
@@ -159,9 +103,11 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
159
  if len(X_posture) == 0:
160
  raise ValueError("No valid posture data found")
161
 
162
- mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture)
 
 
163
 
164
- progress(0.85, "Generating graphs")
165
  mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
166
  color=GRAPH_COLORS['facial_embeddings'],
167
  anomaly_threshold=anomaly_threshold)
@@ -176,21 +122,28 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
176
  mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
177
  anomaly_threshold, color=GRAPH_COLORS['body_posture'])
178
 
179
- mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
 
 
 
 
 
180
 
181
  mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
 
 
182
 
183
- progress(0.9, "Generating video with heatmap")
184
 
185
  # Create video with heatmap
186
  heatmap_video_path = os.path.join(output_folder, "heatmap_video.mp4")
187
- heatmap_video_path = create_video_with_heatmap(video_path, df, mse_embeddings, mse_posture, heatmap_video_path, original_fps, largest_cluster)
188
 
189
  except Exception as e:
190
  print(f"Error details: {str(e)}")
191
  import traceback
192
  traceback.print_exc()
193
- return (f"Error in video processing: {str(e)}",) + (None,) * 16
194
 
195
  progress(1.0, "Preparing results")
196
  results = f"Number of persons detected: {num_clusters}\n\n"
@@ -242,12 +195,16 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
242
  df,
243
  mse_embeddings,
244
  mse_posture,
 
245
  mse_plot_embeddings,
246
- mse_histogram_embeddings,
247
  mse_plot_posture,
 
 
248
  mse_histogram_posture,
 
249
  mse_heatmap_embeddings,
250
  mse_heatmap_posture,
 
251
  face_samples["most_frequent"],
252
  anomaly_faces_embeddings,
253
  anomaly_frames_posture_images,
@@ -256,6 +213,11 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
256
  heatmap_video_path
257
  )
258
 
 
 
 
 
 
259
  def is_frontal_face(landmarks, threshold=60):
260
  nose_tip = landmarks[4]
261
  left_chin = landmarks[234]
 
8
  import math
9
  from face_analysis import get_face_embedding, cluster_faces, organize_faces_by_person, draw_facial_landmarks
10
  from pose_analysis import pose, calculate_posture_score, draw_pose_landmarks
11
+ from voice_analysis import extract_audio_from_video, diarize_speakers, get_speaker_embeddings
12
  from anomaly_detection import anomaly_detection
13
  from visualization import plot_mse, plot_mse_histogram, plot_mse_heatmap, create_video_with_heatmap
14
  from utils import frame_to_timecode
 
16
  from facenet_pytorch import MTCNN
17
  import torch
18
  import mediapipe as mp
19
+ from pyannote.audio import Model
20
 
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
  mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
 
24
  mp_face_mesh = mp.solutions.face_mesh
25
  face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
28
  start_time = time.time()
29
  output_folder = "output"
 
31
 
32
  GRAPH_COLORS = {
33
  'facial_embeddings': 'navy',
34
+ 'body_posture': 'purple',
35
+ 'voice': 'green'
36
  }
37
 
38
  with tempfile.TemporaryDirectory() as temp_dir:
 
85
  progress(0.75, "Getting face samples")
86
  face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
87
 
88
+ progress(0.8, "Extracting audio and performing voice analysis")
89
+ audio_path = extract_audio_from_video(video_path)
90
+ diarization = diarize_speakers(audio_path)
91
+ voice_model = Model.from_pretrained("pyannote/embedding")
92
+ voice_embeddings = get_speaker_embeddings(audio_path, diarization, voice_model)
93
+
94
+ progress(0.85, "Performing anomaly detection")
95
  embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
96
 
97
  X_embeddings = df[embedding_columns].values
 
103
  if len(X_posture) == 0:
104
  raise ValueError("No valid posture data found")
105
 
106
+ X_voice = np.array([emb['embedding'] for emb in voice_embeddings])
107
+
108
+ mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
109
 
110
+ progress(0.9, "Generating graphs")
111
  mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
112
  color=GRAPH_COLORS['facial_embeddings'],
113
  anomaly_threshold=anomaly_threshold)
 
122
  mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
123
  anomaly_threshold, color=GRAPH_COLORS['body_posture'])
124
 
125
+ mse_plot_voice, anomaly_frames_voice = plot_mse(df, mse_voice, "Voice",
126
+ color=GRAPH_COLORS['voice'],
127
+ anomaly_threshold=anomaly_threshold)
128
+
129
+ mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
130
+ anomaly_threshold, color=GRAPH_COLORS['voice'])
131
 
132
  mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
133
+ mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
134
+ mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
135
 
136
+ progress(0.95, "Generating video with heatmap")
137
 
138
  # Create video with heatmap
139
  heatmap_video_path = os.path.join(output_folder, "heatmap_video.mp4")
140
+ heatmap_video_path = create_video_with_heatmap(video_path, df, mse_embeddings, mse_posture, mse_voice, heatmap_video_path, original_fps, largest_cluster)
141
 
142
  except Exception as e:
143
  print(f"Error details: {str(e)}")
144
  import traceback
145
  traceback.print_exc()
146
+ return (f"Error in video processing: {str(e)}",) + (None,) * 21
147
 
148
  progress(1.0, "Preparing results")
149
  results = f"Number of persons detected: {num_clusters}\n\n"
 
195
  df,
196
  mse_embeddings,
197
  mse_posture,
198
+ mse_voice,
199
  mse_plot_embeddings,
 
200
  mse_plot_posture,
201
+ mse_plot_voice,
202
+ mse_histogram_embeddings,
203
  mse_histogram_posture,
204
+ mse_histogram_voice,
205
  mse_heatmap_embeddings,
206
  mse_heatmap_posture,
207
+ mse_heatmap_voice,
208
  face_samples["most_frequent"],
209
  anomaly_faces_embeddings,
210
  anomaly_frames_posture_images,
 
213
  heatmap_video_path
214
  )
215
 
216
+
217
+
218
+
219
+
220
+
221
  def is_frontal_face(landmarks, threshold=60):
222
  nose_tip = landmarks[4]
223
  left_chin = landmarks[234]