File size: 3,215 Bytes
df07554
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os.path
import sys

sys.path.append('..')

import options

import cv2
import dlib
import numpy as np
import options as opt
import matplotlib.pyplot as plt

from tqdm.auto import tqdm
from multiprocessing import Pool

predictor_path = '../pretrain/shape_predictor_68_face_landmarks.dat'
predictor = dlib.shape_predictor(predictor_path)
detector = dlib.get_frontal_face_detector()

RUN_PARALLEL = True
FORCE_RATIO = True
BORDER = 10

base = os.path.abspath('..')
image_dir = os.path.join(base, options.images_dir)
anno_dir = os.path.join(base, options.alignments_dir)
crop_dir = os.path.join(base, options.crop_images_dir)


def get_mouth_marks(shape):
    marks = np.zeros((2, 20))
    co = 0

    # Specific for the mouth.
    for ii in range(48, 68):
        """
        This for loop is going over all mouth-related features.
        X and Y coordinates are extracted and stored separately.
        """
        X = shape.part(ii)
        A = (X.x, X.y)
        marks[0, co] = X.x
        marks[1, co] = X.y
        co += 1

    # Get the extreme points(top-left & bottom-right)
    X_left, Y_left, X_right, Y_right = [
        int(np.amin(marks, axis=1)[0]),
        int(np.amin(marks, axis=1)[1]),
        int(np.amax(marks, axis=1)[0]),
        int(np.amax(marks, axis=1)[1])
    ]

    return X_left, Y_left, X_right, Y_right


translate_pairs = []

for speaker_no in range(1, 35):
    speaker_name = f's{speaker_no}'
    speaker_image_dir = os.path.join(image_dir, speaker_name)
    speaker_crop_dir = os.path.join(crop_dir, speaker_name)
    speaker_anno_dir = os.path.join(anno_dir, speaker_name)

    if not os.path.exists(speaker_image_dir):
        continue
    if not os.path.exists(speaker_crop_dir):
        os.mkdir(speaker_crop_dir)

    sentence_dirs = os.listdir(speaker_image_dir)

    for sentence in sentence_dirs:
        anno_filepath = os.path.join(speaker_anno_dir, f'{sentence}.align')
        if not os.path.exists(anno_filepath):
            continue

        translate_pairs.append((speaker_no, sentence))


print('PAIRS', len(translate_pairs))
bad_sentences = set()
bad_filepaths = []


def extract_mouth_image(speaker_no, sentence):
    speaker_name = f's{speaker_no}'
    speaker_image_dir = os.path.join(image_dir, speaker_name)
    speaker_crop_dir = os.path.join(crop_dir, speaker_name)

    img_sentence_dir = os.path.join(speaker_image_dir, sentence)
    crop_sentence_dir = os.path.join(speaker_crop_dir, sentence)
    filenames = os.listdir(img_sentence_dir)

    if not os.path.exists(crop_sentence_dir):
        os.mkdir(crop_sentence_dir)

    for filename in filenames:
        img_filepath = os.path.join(img_sentence_dir, filename)
        if not img_filepath.endswith('.jpg'):
            continue

        crop_filepath = os.path.join(crop_sentence_dir, filename)
        if not os.path.exists(crop_filepath):
            bad_filepaths.append(crop_filepath)
            print('BAD FILEPATH', (speaker_no, sentence, filename))
            bad_sentences.add((speaker_no, sentence))


for translate_pair in tqdm(translate_pairs):
    extract_mouth_image(*translate_pair)


print('BAD SENTENCES', list(bad_sentences))
print('>>>')
# extract_mouth_image(1, 'pbio7a')