torchnet / scripts /verify_crop_lips.py
milselarch's picture
push to main
df07554
import os.path
import sys
sys.path.append('..')
import options
import cv2
import dlib
import numpy as np
import options as opt
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from multiprocessing import Pool
predictor_path = '../pretrain/shape_predictor_68_face_landmarks.dat'
predictor = dlib.shape_predictor(predictor_path)
detector = dlib.get_frontal_face_detector()
RUN_PARALLEL = True
FORCE_RATIO = True
BORDER = 10
base = os.path.abspath('..')
image_dir = os.path.join(base, options.images_dir)
anno_dir = os.path.join(base, options.alignments_dir)
crop_dir = os.path.join(base, options.crop_images_dir)
def get_mouth_marks(shape):
marks = np.zeros((2, 20))
co = 0
# Specific for the mouth.
for ii in range(48, 68):
"""
This for loop is going over all mouth-related features.
X and Y coordinates are extracted and stored separately.
"""
X = shape.part(ii)
A = (X.x, X.y)
marks[0, co] = X.x
marks[1, co] = X.y
co += 1
# Get the extreme points(top-left & bottom-right)
X_left, Y_left, X_right, Y_right = [
int(np.amin(marks, axis=1)[0]),
int(np.amin(marks, axis=1)[1]),
int(np.amax(marks, axis=1)[0]),
int(np.amax(marks, axis=1)[1])
]
return X_left, Y_left, X_right, Y_right
translate_pairs = []
for speaker_no in range(1, 35):
speaker_name = f's{speaker_no}'
speaker_image_dir = os.path.join(image_dir, speaker_name)
speaker_crop_dir = os.path.join(crop_dir, speaker_name)
speaker_anno_dir = os.path.join(anno_dir, speaker_name)
if not os.path.exists(speaker_image_dir):
continue
if not os.path.exists(speaker_crop_dir):
os.mkdir(speaker_crop_dir)
sentence_dirs = os.listdir(speaker_image_dir)
for sentence in sentence_dirs:
anno_filepath = os.path.join(speaker_anno_dir, f'{sentence}.align')
if not os.path.exists(anno_filepath):
continue
translate_pairs.append((speaker_no, sentence))
print('PAIRS', len(translate_pairs))
bad_sentences = set()
bad_filepaths = []
def extract_mouth_image(speaker_no, sentence):
speaker_name = f's{speaker_no}'
speaker_image_dir = os.path.join(image_dir, speaker_name)
speaker_crop_dir = os.path.join(crop_dir, speaker_name)
img_sentence_dir = os.path.join(speaker_image_dir, sentence)
crop_sentence_dir = os.path.join(speaker_crop_dir, sentence)
filenames = os.listdir(img_sentence_dir)
if not os.path.exists(crop_sentence_dir):
os.mkdir(crop_sentence_dir)
for filename in filenames:
img_filepath = os.path.join(img_sentence_dir, filename)
if not img_filepath.endswith('.jpg'):
continue
crop_filepath = os.path.join(crop_sentence_dir, filename)
if not os.path.exists(crop_filepath):
bad_filepaths.append(crop_filepath)
print('BAD FILEPATH', (speaker_no, sentence, filename))
bad_sentences.add((speaker_no, sentence))
for translate_pair in tqdm(translate_pairs):
extract_mouth_image(*translate_pair)
print('BAD SENTENCES', list(bad_sentences))
print('>>>')
# extract_mouth_image(1, 'pbio7a')