File size: 3,886 Bytes
df07554
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import sys
import time

import face_alignment

sys.path.append('..')

import multiprocessing
from multiprocessing import Pool

from tqdm.auto import tqdm
from Extractor import export_frames
from helpers import *

RUN_PARALLEL = True
PARALLEL_USE_GPU = True
NUM_WORKERS = 12

LRS2_dir = '/home/milselarch/projects/SUTD/50-035/LRS2/lrs2_v1'
videos_dir = os.path.join(LRS2_dir, 'mvlrs_v1/main')
images_dir = os.path.join(LRS2_dir, 'mvlrs_v1/main_images')

usable_sets_paths = [
    '../data/LRS2_CTC2_train.txt',
    '../data/LRS2_CTC2_val.txt',
    '../data/LRS2_CTC2_test.txt'
]

USABLE_VIDEO_GROUP_NAMES = []
for usable_sets_path in usable_sets_paths:
    USABLE_VIDEO_GROUP_NAMES.extend([
        x.strip() for x in open(usable_sets_path).readlines()
    ])

USABLE_VIDEO_GROUP_NAMES = set(USABLE_VIDEO_GROUP_NAMES)
lock = multiprocessing.Lock()
group_ids = os.listdir(videos_dir)
video_paths = []

for group_id in group_ids:
    group_dirpath = os.path.join(videos_dir, group_id)
    filenames = os.listdir(group_dirpath)

    for filename in filenames:
        if not filename.endswith('.mp4'):
            continue

        basename, _ = os.path.splitext(filename)
        video_group_name = f'{group_id}/{basename}'
        if video_group_name not in USABLE_VIDEO_GROUP_NAMES:
            continue

        video_paths.append((group_id, filename))


def process_video(
    video_path, video_images_dir, use_gpu
):
    export_frames(
        video_path, video_images_dir,
        recycle_landmarks=True, use_gpu=use_gpu,
    )
    # print('PROC', video_path)

    # print('PROCESS_END', group_id, video_filename)
    return video_path, video_images_dir


def callback(result):
    pbar.desc = str(result)
    pbar.update(1)
    # pbar.refresh()
    # shutil.rmtree(vid_temp_dir)


if RUN_PARALLEL:
    pbar = tqdm(video_paths)
    pool = Pool(processes=NUM_WORKERS)
    jobs = []

    for data_pair in video_paths:
        group_id, video_filename = data_pair
        pbar.desc = str(data_pair)

        basename, _ = os.path.splitext(video_filename)
        temp_id = f'{group_id}-{basename}'
        # vid_temp_dir = os.path.join(temp_dir, temp_id)

        video_path = os.path.join(videos_dir, group_id, video_filename)
        group_images_dir = os.path.join(images_dir, group_id)
        video_images_dir = os.path.join(group_images_dir, basename)

        if not os.path.exists(group_images_dir):
            os.mkdir(group_images_dir)
        if not os.path.exists(video_images_dir):
            os.mkdir(video_images_dir)

        job_kwargs = kwargify(
            video_path=video_path,
            video_images_dir=video_images_dir,
            use_gpu=PARALLEL_USE_GPU
        )
        job = pool.apply_async(
            process_video, kwds=job_kwargs,
            callback=callback
        )

        jobs.append(job)

    print('ALL JOBS ADDED', len(jobs))
    # Wait for all tasks to complete
    for job in jobs:
        job.wait()

    pool.close()
    pool.join()
else:
    pbar = tqdm(video_paths)

    for data_pair in pbar:
        group_id, video_filename = data_pair

        basename, _ = os.path.splitext(video_filename)
        temp_id = f'{group_id}-{basename}'
        # vid_temp_dir = os.path.join(temp_dir, temp_id)

        video_path = os.path.join(videos_dir, group_id, video_filename)
        group_images_dir = os.path.join(images_dir, group_id)
        video_images_dir = os.path.join(group_images_dir, basename)

        if not os.path.exists(group_images_dir):
            os.mkdir(group_images_dir)
        if not os.path.exists(video_images_dir):
            os.mkdir(video_images_dir)

        process_video(
            video_path=video_path,
            video_images_dir=video_images_dir,
            use_gpu=True
        )

print('TOTAL VIDEOS', len(video_paths))
# print('COMPLETE EXTRACTED VIDEOS', complete_extractions)
print('>>>')