import os import cv2 import numpy as np from tqdm import tqdm from mtcnn import MTCNN def normalize_frame(frame, mean, std): frame = frame / 255.0 mean = np.array(mean).reshape(1, 1, 3) std = np.array(std).reshape(1, 1, 3) normalized_frame = (frame - mean) / std return normalized_frame def detect_faces_in_video(video_path, output_dir, padding_percentage=0.3, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], full_detection_interval=10): os.makedirs(output_dir, exist_ok=True) detector = MTCNN() cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise Exception(f"Error: Unable to open video file {video_path}") total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) frame_count = 0 cropped_faces = [] trackers = [] with tqdm(total=total_frames, desc="Extracting faces", unit="frame") as pbar: while True: ret, frame = cap.read() if not ret: break if frame is None: print(f"[WARNING] Empty frame at {frame_count}") continue if frame_count % full_detection_interval == 0: rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) faces = detector.detect_faces(rgb_frame) trackers = [] for i, face in enumerate(faces): confidence = face['confidence'] if confidence < 0.85: continue x, y, w, h = face['box'] if w < 50 or h < 50: continue padding = max(1, int(min(w, h) * padding_percentage)) x1 = max(0, x - padding) y1 = max(0, y - padding) x2 = min(rgb_frame.shape[1], x + w + padding) y2 = min(rgb_frame.shape[0], y + h + padding) cropped_face = frame[y1:y2, x1:x2] if cropped_face.size == 0: continue resized_cropped_face = cv2.resize(cropped_face, (224, 224)) normalized_face = normalize_frame(resized_cropped_face, mean, std) face_filename = f"frame_{frame_count:05d}_face_{i}.npy" face_path = os.path.join(output_dir, face_filename) np.save(face_path, normalized_face) cropped_faces.append(face_path) tracker = cv2.TrackerCSRT_create() tracker.init(frame, (x, y, w, h)) trackers.append(tracker) else: for i, tracker in enumerate(trackers): success, box = tracker.update(frame) if success: x, y, w, h = [int(v) for v in box] padding = max(1, int(min(w, h) * padding_percentage)) x1 = max(0, x - padding) y1 = max(0, y - padding) x2 = min(frame.shape[1], x + w + padding) y2 = min(frame.shape[0], y + h + padding) cropped_face = frame[y1:y2, x1:x2] if cropped_face.size == 0: continue resized_cropped_face = cv2.resize(cropped_face, (224, 224)) normalized_face = normalize_frame(resized_cropped_face, mean, std) face_filename = f"frame_{frame_count:05d}_track_{i}.npy" face_path = os.path.join(output_dir, face_filename) np.save(face_path, normalized_face) cropped_faces.append(face_path) frame_count += 1 pbar.update(1) cap.release() return cropped_faces