|
import os |
|
import cv2 |
|
import numpy as np |
|
from tqdm import tqdm |
|
from mtcnn import MTCNN |
|
|
|
def normalize_frame(frame, mean, std): |
|
frame = frame / 255.0 |
|
mean = np.array(mean).reshape(1, 1, 3) |
|
std = np.array(std).reshape(1, 1, 3) |
|
normalized_frame = (frame - mean) / std |
|
return normalized_frame |
|
|
|
def detect_faces_in_video(video_path, output_dir, padding_percentage=0.3, |
|
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], |
|
full_detection_interval=10): |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
detector = MTCNN() |
|
cap = cv2.VideoCapture(video_path) |
|
if not cap.isOpened(): |
|
raise Exception(f"Error: Unable to open video file {video_path}") |
|
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
frame_count = 0 |
|
cropped_faces = [] |
|
trackers = [] |
|
|
|
with tqdm(total=total_frames, desc="Extracting faces", unit="frame") as pbar: |
|
while True: |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
|
|
if frame is None: |
|
print(f"[WARNING] Empty frame at {frame_count}") |
|
continue |
|
|
|
if frame_count % full_detection_interval == 0: |
|
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
faces = detector.detect_faces(rgb_frame) |
|
trackers = [] |
|
|
|
for i, face in enumerate(faces): |
|
confidence = face['confidence'] |
|
if confidence < 0.85: |
|
continue |
|
|
|
x, y, w, h = face['box'] |
|
if w < 50 or h < 50: |
|
continue |
|
|
|
padding = max(1, int(min(w, h) * padding_percentage)) |
|
x1 = max(0, x - padding) |
|
y1 = max(0, y - padding) |
|
x2 = min(rgb_frame.shape[1], x + w + padding) |
|
y2 = min(rgb_frame.shape[0], y + h + padding) |
|
|
|
cropped_face = frame[y1:y2, x1:x2] |
|
if cropped_face.size == 0: |
|
continue |
|
|
|
resized_cropped_face = cv2.resize(cropped_face, (224, 224)) |
|
normalized_face = normalize_frame(resized_cropped_face, mean, std) |
|
|
|
face_filename = f"frame_{frame_count:05d}_face_{i}.npy" |
|
face_path = os.path.join(output_dir, face_filename) |
|
np.save(face_path, normalized_face) |
|
cropped_faces.append(face_path) |
|
|
|
tracker = cv2.TrackerCSRT_create() |
|
tracker.init(frame, (x, y, w, h)) |
|
trackers.append(tracker) |
|
else: |
|
for i, tracker in enumerate(trackers): |
|
success, box = tracker.update(frame) |
|
if success: |
|
x, y, w, h = [int(v) for v in box] |
|
padding = max(1, int(min(w, h) * padding_percentage)) |
|
x1 = max(0, x - padding) |
|
y1 = max(0, y - padding) |
|
x2 = min(frame.shape[1], x + w + padding) |
|
y2 = min(frame.shape[0], y + h + padding) |
|
|
|
cropped_face = frame[y1:y2, x1:x2] |
|
if cropped_face.size == 0: |
|
continue |
|
|
|
resized_cropped_face = cv2.resize(cropped_face, (224, 224)) |
|
normalized_face = normalize_frame(resized_cropped_face, mean, std) |
|
|
|
face_filename = f"frame_{frame_count:05d}_track_{i}.npy" |
|
face_path = os.path.join(output_dir, face_filename) |
|
np.save(face_path, normalized_face) |
|
cropped_faces.append(face_path) |
|
|
|
frame_count += 1 |
|
pbar.update(1) |
|
|
|
cap.release() |
|
return cropped_faces |
|
|