Update video_processing.py
Browse files- video_processing.py +36 -43
video_processing.py
CHANGED
@@ -15,8 +15,6 @@ import pandas as pd
|
|
15 |
from facenet_pytorch import MTCNN
|
16 |
import torch
|
17 |
import mediapipe as mp
|
18 |
-
from voice_analysis import process_audio
|
19 |
-
from pydub import AudioSegment
|
20 |
|
21 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
22 |
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
|
@@ -24,6 +22,7 @@ mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_fac
|
|
24 |
mp_face_mesh = mp.solutions.face_mesh
|
25 |
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
|
26 |
|
|
|
27 |
def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
|
28 |
os.makedirs(output_folder, exist_ok=True)
|
29 |
clip = VideoFileClip(video_path)
|
@@ -46,6 +45,7 @@ def extract_frames(video_path, output_folder, desired_fps, progress_callback=Non
|
|
46 |
clip.close()
|
47 |
return frame_count, original_fps
|
48 |
|
|
|
49 |
def process_frames(frames_folder, aligned_faces_folder, frame_count, progress):
|
50 |
embeddings_by_frame = {}
|
51 |
posture_scores_by_frame = {}
|
@@ -88,8 +88,8 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress):
|
|
88 |
progress((i + 1) / len(frame_files), f"Processing frame {i + 1} of {len(frame_files)}")
|
89 |
|
90 |
return embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths, facial_landmarks_by_frame
|
91 |
-
|
92 |
-
|
93 |
def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
94 |
start_time = time.time()
|
95 |
output_folder = "output"
|
@@ -124,7 +124,6 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
124 |
frames_folder, aligned_faces_folder,
|
125 |
frame_count,
|
126 |
progress)
|
127 |
-
|
128 |
|
129 |
if not aligned_face_paths:
|
130 |
raise ValueError("No faces were extracted from the video.")
|
@@ -155,45 +154,40 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
155 |
try:
|
156 |
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
157 |
X_posture = X_posture[X_posture != None].reshape(-1, 1)
|
158 |
-
|
159 |
if len(X_posture) == 0:
|
160 |
raise ValueError("No valid posture data found")
|
161 |
-
|
162 |
mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture)
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
anomaly_threshold=anomaly_threshold)
|
188 |
-
mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
|
189 |
-
anomaly_threshold, color='green')
|
190 |
-
mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
|
191 |
-
|
192 |
except Exception as e:
|
193 |
print(f"Error details: {str(e)}")
|
194 |
import traceback
|
195 |
traceback.print_exc()
|
196 |
-
return (f"Error in video processing: {str(e)}",) + (None,) *
|
197 |
|
198 |
progress(1.0, "Preparing results")
|
199 |
results = f"Number of persons detected: {num_clusters}\n\n"
|
@@ -251,18 +245,15 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
251 |
mse_histogram_posture,
|
252 |
mse_heatmap_embeddings,
|
253 |
mse_heatmap_posture,
|
254 |
-
mse_voice,
|
255 |
-
mse_plot_voice,
|
256 |
-
mse_histogram_voice,
|
257 |
-
mse_heatmap_voice,
|
258 |
-
anomaly_segments_voice,
|
259 |
face_samples["most_frequent"],
|
260 |
anomaly_faces_embeddings,
|
261 |
anomaly_frames_posture_images,
|
262 |
aligned_faces_folder,
|
263 |
-
frames_folder
|
|
|
264 |
)
|
265 |
|
|
|
266 |
def is_frontal_face(landmarks, threshold=60):
|
267 |
nose_tip = landmarks[4]
|
268 |
left_chin = landmarks[234]
|
@@ -277,6 +268,7 @@ def is_frontal_face(landmarks, threshold=60):
|
|
277 |
angle_degrees = math.degrees(angle)
|
278 |
return abs(180 - angle_degrees) < threshold
|
279 |
|
|
|
280 |
def save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps, original_fps, output_folder, video_duration):
|
281 |
person_data = {}
|
282 |
|
@@ -310,6 +302,7 @@ def save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps, original
|
|
310 |
|
311 |
return df, largest_cluster
|
312 |
|
|
|
313 |
def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster, max_samples=100):
|
314 |
face_samples = {"most_frequent": [], "others": []}
|
315 |
for cluster_folder in sorted(os.listdir(organized_faces_folder)):
|
|
|
15 |
from facenet_pytorch import MTCNN
|
16 |
import torch
|
17 |
import mediapipe as mp
|
|
|
|
|
18 |
|
19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
|
|
|
22 |
mp_face_mesh = mp.solutions.face_mesh
|
23 |
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
|
24 |
|
25 |
+
|
26 |
def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
|
27 |
os.makedirs(output_folder, exist_ok=True)
|
28 |
clip = VideoFileClip(video_path)
|
|
|
45 |
clip.close()
|
46 |
return frame_count, original_fps
|
47 |
|
48 |
+
|
49 |
def process_frames(frames_folder, aligned_faces_folder, frame_count, progress):
|
50 |
embeddings_by_frame = {}
|
51 |
posture_scores_by_frame = {}
|
|
|
88 |
progress((i + 1) / len(frame_files), f"Processing frame {i + 1} of {len(frame_files)}")
|
89 |
|
90 |
return embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths, facial_landmarks_by_frame
|
91 |
+
|
92 |
+
|
93 |
def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
94 |
start_time = time.time()
|
95 |
output_folder = "output"
|
|
|
124 |
frames_folder, aligned_faces_folder,
|
125 |
frame_count,
|
126 |
progress)
|
|
|
127 |
|
128 |
if not aligned_face_paths:
|
129 |
raise ValueError("No faces were extracted from the video.")
|
|
|
154 |
try:
|
155 |
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
156 |
X_posture = X_posture[X_posture != None].reshape(-1, 1)
|
157 |
+
|
158 |
if len(X_posture) == 0:
|
159 |
raise ValueError("No valid posture data found")
|
160 |
+
|
161 |
mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture)
|
162 |
+
|
163 |
+
progress(0.95, "Generating plots")
|
164 |
+
mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
|
165 |
+
color=GRAPH_COLORS['facial_embeddings'],
|
166 |
+
anomaly_threshold=anomaly_threshold)
|
167 |
+
|
168 |
+
mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
|
169 |
+
anomaly_threshold, color=GRAPH_COLORS['facial_embeddings'])
|
170 |
+
|
171 |
+
mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
|
172 |
+
color=GRAPH_COLORS['body_posture'],
|
173 |
+
anomaly_threshold=anomaly_threshold)
|
174 |
+
|
175 |
+
mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
|
176 |
+
anomaly_threshold, color=GRAPH_COLORS['body_posture'])
|
177 |
+
|
178 |
+
mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
|
179 |
+
|
180 |
+
mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
|
181 |
+
|
182 |
+
# Create video with heatmap
|
183 |
+
heatmap_video_path = os.path.join(output_folder, "video_with_heatmap.mp4")
|
184 |
+
create_video_with_heatmap(video_path, df, mse_embeddings, mse_posture, mse_voice, heatmap_video_path)
|
185 |
+
|
|
|
|
|
|
|
|
|
|
|
186 |
except Exception as e:
|
187 |
print(f"Error details: {str(e)}")
|
188 |
import traceback
|
189 |
traceback.print_exc()
|
190 |
+
return (f"Error in video processing: {str(e)}",) + (None,) * 15
|
191 |
|
192 |
progress(1.0, "Preparing results")
|
193 |
results = f"Number of persons detected: {num_clusters}\n\n"
|
|
|
245 |
mse_histogram_posture,
|
246 |
mse_heatmap_embeddings,
|
247 |
mse_heatmap_posture,
|
|
|
|
|
|
|
|
|
|
|
248 |
face_samples["most_frequent"],
|
249 |
anomaly_faces_embeddings,
|
250 |
anomaly_frames_posture_images,
|
251 |
aligned_faces_folder,
|
252 |
+
frames_folder,
|
253 |
+
heatmap_video_path
|
254 |
)
|
255 |
|
256 |
+
|
257 |
def is_frontal_face(landmarks, threshold=60):
|
258 |
nose_tip = landmarks[4]
|
259 |
left_chin = landmarks[234]
|
|
|
268 |
angle_degrees = math.degrees(angle)
|
269 |
return abs(180 - angle_degrees) < threshold
|
270 |
|
271 |
+
|
272 |
def save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps, original_fps, output_folder, video_duration):
|
273 |
person_data = {}
|
274 |
|
|
|
302 |
|
303 |
return df, largest_cluster
|
304 |
|
305 |
+
|
306 |
def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster, max_samples=100):
|
307 |
face_samples = {"most_frequent": [], "others": []}
|
308 |
for cluster_folder in sorted(os.listdir(organized_faces_folder)):
|