|
import os.path |
|
import sys |
|
|
|
sys.path.append('..') |
|
|
|
import options |
|
|
|
import cv2 |
|
import dlib |
|
import numpy as np |
|
import options as opt |
|
import matplotlib.pyplot as plt |
|
|
|
from tqdm.auto import tqdm |
|
from multiprocessing import Pool |
|
|
|
predictor_path = '../pretrain/shape_predictor_68_face_landmarks.dat' |
|
predictor = dlib.shape_predictor(predictor_path) |
|
detector = dlib.get_frontal_face_detector() |
|
|
|
RUN_PARALLEL = True |
|
FORCE_RATIO = True |
|
BORDER = 10 |
|
|
|
base = os.path.abspath('..') |
|
image_dir = os.path.join(base, options.images_dir) |
|
anno_dir = os.path.join(base, options.alignments_dir) |
|
crop_dir = os.path.join(base, options.crop_images_dir) |
|
|
|
|
|
def get_mouth_marks(shape): |
|
marks = np.zeros((2, 20)) |
|
co = 0 |
|
|
|
|
|
for ii in range(48, 68): |
|
""" |
|
This for loop is going over all mouth-related features. |
|
X and Y coordinates are extracted and stored separately. |
|
""" |
|
X = shape.part(ii) |
|
A = (X.x, X.y) |
|
marks[0, co] = X.x |
|
marks[1, co] = X.y |
|
co += 1 |
|
|
|
|
|
X_left, Y_left, X_right, Y_right = [ |
|
int(np.amin(marks, axis=1)[0]), |
|
int(np.amin(marks, axis=1)[1]), |
|
int(np.amax(marks, axis=1)[0]), |
|
int(np.amax(marks, axis=1)[1]) |
|
] |
|
|
|
return X_left, Y_left, X_right, Y_right |
|
|
|
|
|
translate_pairs = [] |
|
|
|
for speaker_no in range(1, 35): |
|
speaker_name = f's{speaker_no}' |
|
speaker_image_dir = os.path.join(image_dir, speaker_name) |
|
speaker_crop_dir = os.path.join(crop_dir, speaker_name) |
|
speaker_anno_dir = os.path.join(anno_dir, speaker_name) |
|
|
|
if not os.path.exists(speaker_image_dir): |
|
continue |
|
if not os.path.exists(speaker_crop_dir): |
|
os.mkdir(speaker_crop_dir) |
|
|
|
sentence_dirs = os.listdir(speaker_image_dir) |
|
|
|
for sentence in sentence_dirs: |
|
anno_filepath = os.path.join(speaker_anno_dir, f'{sentence}.align') |
|
if not os.path.exists(anno_filepath): |
|
continue |
|
|
|
translate_pairs.append((speaker_no, sentence)) |
|
|
|
|
|
print('PAIRS', len(translate_pairs)) |
|
bad_sentences = set() |
|
bad_filepaths = [] |
|
|
|
|
|
def extract_mouth_image(speaker_no, sentence): |
|
speaker_name = f's{speaker_no}' |
|
speaker_image_dir = os.path.join(image_dir, speaker_name) |
|
speaker_crop_dir = os.path.join(crop_dir, speaker_name) |
|
|
|
img_sentence_dir = os.path.join(speaker_image_dir, sentence) |
|
crop_sentence_dir = os.path.join(speaker_crop_dir, sentence) |
|
filenames = os.listdir(img_sentence_dir) |
|
|
|
if not os.path.exists(crop_sentence_dir): |
|
os.mkdir(crop_sentence_dir) |
|
|
|
for filename in filenames: |
|
img_filepath = os.path.join(img_sentence_dir, filename) |
|
if not img_filepath.endswith('.jpg'): |
|
continue |
|
|
|
crop_filepath = os.path.join(crop_sentence_dir, filename) |
|
if not os.path.exists(crop_filepath): |
|
bad_filepaths.append(crop_filepath) |
|
print('BAD FILEPATH', (speaker_no, sentence, filename)) |
|
bad_sentences.add((speaker_no, sentence)) |
|
|
|
|
|
for translate_pair in tqdm(translate_pairs): |
|
extract_mouth_image(*translate_pair) |
|
|
|
|
|
print('BAD SENTENCES', list(bad_sentences)) |
|
print('>>>') |
|
|