Spaces:
Running
on
Zero
Running
on
Zero
import argparse | |
import json | |
import os | |
import soundfile as sf | |
from tqdm import tqdm | |
from rich import print | |
def preprocess_one_dir(in_data_dir, out_dir, data_type): | |
"""Create .json file for one condition.""" | |
mix_infos = [] | |
s1_infos = [] | |
s2_infos = [] | |
in_dir = os.path.abspath(os.path.join(in_data_dir, data_type)) | |
print("Process {} set...".format(data_type)) | |
for root, dirs, files in os.walk(in_dir): | |
for file in files: | |
if file.endswith(".wav") and file.startswith("mix"): | |
file_path = os.path.join(root, file) | |
audio, _ = sf.read(file_path) | |
mix_infos.append(( | |
file_path, | |
len(audio), | |
)) | |
file_path = file_path.replace("mix", "spk1_reverb") | |
audio, _ = sf.read(file_path) | |
s1_infos.append(( | |
file_path, | |
len(audio), | |
)) | |
file_path = file_path.replace("spk1_reverb", "spk2_reverb") | |
audio, _ = sf.read(file_path) | |
s2_infos.append(( | |
file_path, | |
len(audio), | |
)) | |
print("Process num: {}".format(len(mix_infos)), end="\r") | |
if not os.path.exists(os.path.join(out_dir, data_type)): | |
os.makedirs(os.path.join(out_dir, data_type)) | |
with open(os.path.join(out_dir, data_type, "mix.json"), "w") as f: | |
json.dump(mix_infos, f, indent=4) | |
with open(os.path.join(out_dir, data_type, "s1.json"), "w") as f: | |
json.dump(s1_infos, f, indent=4) | |
with open(os.path.join(out_dir, data_type, "s2.json"), "w") as f: | |
json.dump(s2_infos, f, indent=4) | |
def preprocess_lrs2_audio(inp_args): | |
"""Create .json files for all conditions.""" | |
for data_type in ["train", "val", "test"]: | |
preprocess_one_dir( | |
inp_args.in_dir, inp_args.out_dir, data_type | |
) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser("LRS2 audio data preprocessing") | |
parser.add_argument( | |
"--in_dir", | |
type=str, | |
default=None, | |
help="Directory path of audio including tr, cv and tt", | |
) | |
parser.add_argument( | |
"--out_dir", type=str, default=None, help="Directory path to put output files" | |
) | |
args = parser.parse_args() | |
print(args) | |
preprocess_lrs2_audio(args) | |