import os.path |
import sys |
sys.path.append('..') |
import options |
import cv2 |
import dlib |
import numpy as np |
import options as opt |
import matplotlib.pyplot as plt |
from tqdm.auto import tqdm |
from multiprocessing import Pool |
predictor_path = '../pretrain/shape_predictor_68_face_landmarks.dat' |
predictor = dlib.shape_predictor(predictor_path) |
detector = dlib.get_frontal_face_detector() |
BORDER = 10 |
base = os.path.abspath('..') |
image_dir = os.path.join(base, options.images_dir) |
anno_dir = os.path.join(base, options.alignments_dir) |
crop_dir = os.path.join(base, options.crop_images_dir) |
def get_mouth_marks(shape): |
marks = np.zeros((2, 20)) |
co = 0 |
for ii in range(48, 68): |
""" |
This for loop is going over all mouth-related features. |
X and Y coordinates are extracted and stored separately. |
""" |
X = shape.part(ii) |
A = (X.x, X.y) |
marks[0, co] = X.x |
marks[1, co] = X.y |
co += 1 |
X_left, Y_left, X_right, Y_right = [ |
int(np.amin(marks, axis=1)[0]), |
int(np.amin(marks, axis=1)[1]), |
int(np.amax(marks, axis=1)[0]), |
int(np.amax(marks, axis=1)[1]) |
] |
return X_left, Y_left, X_right, Y_right |
translate_pairs = [] |
for speaker_no in range(1, 35): |
speaker_name = f's{speaker_no}' |
speaker_image_dir = os.path.join(image_dir, speaker_name) |
speaker_crop_dir = os.path.join(crop_dir, speaker_name) |
speaker_anno_dir = os.path.join(anno_dir, speaker_name) |
if not os.path.exists(speaker_image_dir): |
continue |
if not os.path.exists(speaker_crop_dir): |
os.mkdir(speaker_crop_dir) |
sentence_dirs = os.listdir(speaker_image_dir) |
for sentence in sentence_dirs: |
anno_filepath = os.path.join(speaker_anno_dir, f'{sentence}.align') |
if not os.path.exists(anno_filepath): |
continue |
translate_pairs.append((speaker_no, sentence)) |
print('PAIRS', len(translate_pairs)) |
bad_sentences = set() |
bad_filepaths = [] |
def extract_mouth_image(speaker_no, sentence): |
speaker_name = f's{speaker_no}' |
speaker_image_dir = os.path.join(image_dir, speaker_name) |
speaker_crop_dir = os.path.join(crop_dir, speaker_name) |
img_sentence_dir = os.path.join(speaker_image_dir, sentence) |
crop_sentence_dir = os.path.join(speaker_crop_dir, sentence) |
filenames = os.listdir(img_sentence_dir) |
if not os.path.exists(crop_sentence_dir): |
os.mkdir(crop_sentence_dir) |
for filename in filenames: |
img_filepath = os.path.join(img_sentence_dir, filename) |
if not img_filepath.endswith('.jpg'): |
continue |
crop_filepath = os.path.join(crop_sentence_dir, filename) |
if not os.path.exists(crop_filepath): |
bad_filepaths.append(crop_filepath) |
print('BAD FILEPATH', (speaker_no, sentence, filename)) |
bad_sentences.add((speaker_no, sentence)) |
for translate_pair in tqdm(translate_pairs): |
extract_mouth_image(*translate_pair) |
print('BAD SENTENCES', list(bad_sentences)) |
print('>>>') |