File size: 885 Bytes
df07554
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os

CTC_SCALE = 2
lrs2_dirpath = '/media/milselarch/47FC4BC577667AAD/LRS2'
valid_lrs2_filepath = f'../data/LRS2-CTC{CTC_SCALE}-valid-pairs.txt'
filenames = ['train.txt', 'test.txt', 'val.txt']
valid_lrs2_pairs = set([
    line.strip() for line in open(valid_lrs2_filepath).readlines()
])

for filename in filenames:
    filepath = os.path.join(lrs2_dirpath, filename)
    lines = open(filepath, 'r').readlines()
    valid_lines = []

    for line in lines:
        line = line + ' '
        line = line[:line.index(' ')].strip()

        if line in valid_lrs2_pairs:
            valid_lines.append(line)

    valid_lines = sorted(valid_lines)
    export_filename = f'../data/LRS2_CTC{CTC_SCALE}_{filename}'
    open(export_filename, 'w').write('\n'.join(valid_lines))

    print(f'<<< {filename} >>>')
    print(f'VALID: {len(valid_lines)}')
    print(f'TOTAL: {len(lines)}')