File size: 3,906 Bytes
2c966e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import cv2
import numpy as np
from tqdm import tqdm
from mtcnn import MTCNN

def normalize_frame(frame, mean, std):
    frame = frame / 255.0
    mean = np.array(mean).reshape(1, 1, 3)
    std = np.array(std).reshape(1, 1, 3)
    normalized_frame = (frame - mean) / std
    return normalized_frame

def detect_faces_in_video(video_path, output_dir, padding_percentage=0.3,
                           mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],
                           full_detection_interval=10):
    os.makedirs(output_dir, exist_ok=True)

    detector = MTCNN()
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise Exception(f"Error: Unable to open video file {video_path}")

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_count = 0
    cropped_faces = []
    trackers = []

    with tqdm(total=total_frames, desc="Extracting faces", unit="frame") as pbar:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            if frame is None:
                print(f"[WARNING] Empty frame at {frame_count}")
                continue

            if frame_count % full_detection_interval == 0:
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                faces = detector.detect_faces(rgb_frame)
                trackers = []

                for i, face in enumerate(faces):
                    confidence = face['confidence']
                    if confidence < 0.85:
                        continue

                    x, y, w, h = face['box']
                    if w < 50 or h < 50:
                        continue

                    padding = max(1, int(min(w, h) * padding_percentage))
                    x1 = max(0, x - padding)
                    y1 = max(0, y - padding)
                    x2 = min(rgb_frame.shape[1], x + w + padding)
                    y2 = min(rgb_frame.shape[0], y + h + padding)

                    cropped_face = frame[y1:y2, x1:x2]
                    if cropped_face.size == 0:
                        continue

                    resized_cropped_face = cv2.resize(cropped_face, (224, 224))
                    normalized_face = normalize_frame(resized_cropped_face, mean, std)

                    face_filename = f"frame_{frame_count:05d}_face_{i}.npy"
                    face_path = os.path.join(output_dir, face_filename)
                    np.save(face_path, normalized_face)
                    cropped_faces.append(face_path)

                    tracker = cv2.TrackerCSRT_create()
                    tracker.init(frame, (x, y, w, h))
                    trackers.append(tracker)
            else:
                for i, tracker in enumerate(trackers):
                    success, box = tracker.update(frame)
                    if success:
                        x, y, w, h = [int(v) for v in box]
                        padding = max(1, int(min(w, h) * padding_percentage))
                        x1 = max(0, x - padding)
                        y1 = max(0, y - padding)
                        x2 = min(frame.shape[1], x + w + padding)
                        y2 = min(frame.shape[0], y + h + padding)

                        cropped_face = frame[y1:y2, x1:x2]
                        if cropped_face.size == 0:
                            continue

                        resized_cropped_face = cv2.resize(cropped_face, (224, 224))
                        normalized_face = normalize_frame(resized_cropped_face, mean, std)

                        face_filename = f"frame_{frame_count:05d}_track_{i}.npy"
                        face_path = os.path.join(output_dir, face_filename)
                        np.save(face_path, normalized_face)
                        cropped_faces.append(face_path)

            frame_count += 1
            pbar.update(1)

    cap.release()
    return cropped_faces