File size: 3,886 Bytes
df07554 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import sys
import time
import face_alignment
sys.path.append('..')
import multiprocessing
from multiprocessing import Pool
from tqdm.auto import tqdm
from Extractor import export_frames
from helpers import *
RUN_PARALLEL = True
PARALLEL_USE_GPU = True
NUM_WORKERS = 12
LRS2_dir = '/home/milselarch/projects/SUTD/50-035/LRS2/lrs2_v1'
videos_dir = os.path.join(LRS2_dir, 'mvlrs_v1/main')
images_dir = os.path.join(LRS2_dir, 'mvlrs_v1/main_images')
usable_sets_paths = [
'../data/LRS2_CTC2_train.txt',
'../data/LRS2_CTC2_val.txt',
'../data/LRS2_CTC2_test.txt'
]
USABLE_VIDEO_GROUP_NAMES = []
for usable_sets_path in usable_sets_paths:
USABLE_VIDEO_GROUP_NAMES.extend([
x.strip() for x in open(usable_sets_path).readlines()
])
USABLE_VIDEO_GROUP_NAMES = set(USABLE_VIDEO_GROUP_NAMES)
lock = multiprocessing.Lock()
group_ids = os.listdir(videos_dir)
video_paths = []
for group_id in group_ids:
group_dirpath = os.path.join(videos_dir, group_id)
filenames = os.listdir(group_dirpath)
for filename in filenames:
if not filename.endswith('.mp4'):
continue
basename, _ = os.path.splitext(filename)
video_group_name = f'{group_id}/{basename}'
if video_group_name not in USABLE_VIDEO_GROUP_NAMES:
continue
video_paths.append((group_id, filename))
def process_video(
video_path, video_images_dir, use_gpu
):
export_frames(
video_path, video_images_dir,
recycle_landmarks=True, use_gpu=use_gpu,
)
# print('PROC', video_path)
# print('PROCESS_END', group_id, video_filename)
return video_path, video_images_dir
def callback(result):
pbar.desc = str(result)
pbar.update(1)
# pbar.refresh()
# shutil.rmtree(vid_temp_dir)
if RUN_PARALLEL:
pbar = tqdm(video_paths)
pool = Pool(processes=NUM_WORKERS)
jobs = []
for data_pair in video_paths:
group_id, video_filename = data_pair
pbar.desc = str(data_pair)
basename, _ = os.path.splitext(video_filename)
temp_id = f'{group_id}-{basename}'
# vid_temp_dir = os.path.join(temp_dir, temp_id)
video_path = os.path.join(videos_dir, group_id, video_filename)
group_images_dir = os.path.join(images_dir, group_id)
video_images_dir = os.path.join(group_images_dir, basename)
if not os.path.exists(group_images_dir):
os.mkdir(group_images_dir)
if not os.path.exists(video_images_dir):
os.mkdir(video_images_dir)
job_kwargs = kwargify(
video_path=video_path,
video_images_dir=video_images_dir,
use_gpu=PARALLEL_USE_GPU
)
job = pool.apply_async(
process_video, kwds=job_kwargs,
callback=callback
)
jobs.append(job)
print('ALL JOBS ADDED', len(jobs))
# Wait for all tasks to complete
for job in jobs:
job.wait()
pool.close()
pool.join()
else:
pbar = tqdm(video_paths)
for data_pair in pbar:
group_id, video_filename = data_pair
basename, _ = os.path.splitext(video_filename)
temp_id = f'{group_id}-{basename}'
# vid_temp_dir = os.path.join(temp_dir, temp_id)
video_path = os.path.join(videos_dir, group_id, video_filename)
group_images_dir = os.path.join(images_dir, group_id)
video_images_dir = os.path.join(group_images_dir, basename)
if not os.path.exists(group_images_dir):
os.mkdir(group_images_dir)
if not os.path.exists(video_images_dir):
os.mkdir(video_images_dir)
process_video(
video_path=video_path,
video_images_dir=video_images_dir,
use_gpu=True
)
print('TOTAL VIDEOS', len(video_paths))
# print('COMPLETE EXTRACTED VIDEOS', complete_extractions)
print('>>>') |