Spaces:
Paused
Paused
| import argparse | |
| import glob | |
| import multiprocessing | |
| import os | |
| import pathlib | |
| import torch | |
| from tqdm import tqdm | |
| from TTS.utils.vad import get_vad_model_and_utils, remove_silence | |
| torch.set_num_threads(1) | |
| def adjust_path_and_remove_silence(audio_path): | |
| output_path = audio_path.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, "")) | |
| # ignore if the file exists | |
| if os.path.exists(output_path) and not args.force: | |
| return output_path, False | |
| # create all directory structure | |
| pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True) | |
| # remove the silence and save the audio | |
| output_path, is_speech = remove_silence( | |
| model_and_utils, | |
| audio_path, | |
| output_path, | |
| trim_just_beginning_and_end=args.trim_just_beginning_and_end, | |
| use_cuda=args.use_cuda, | |
| ) | |
| return output_path, is_speech | |
| def preprocess_audios(): | |
| files = sorted(glob.glob(os.path.join(args.input_dir, args.glob), recursive=True)) | |
| print("> Number of files: ", len(files)) | |
| if not args.force: | |
| print("> Ignoring files that already exist in the output idrectory.") | |
| if args.trim_just_beginning_and_end: | |
| print("> Trimming just the beginning and the end with nonspeech parts.") | |
| else: | |
| print("> Trimming all nonspeech parts.") | |
| filtered_files = [] | |
| if files: | |
| # create threads | |
| # num_threads = multiprocessing.cpu_count() | |
| # process_map(adjust_path_and_remove_silence, files, max_workers=num_threads, chunksize=15) | |
| if args.num_processes > 1: | |
| with multiprocessing.Pool(processes=args.num_processes) as pool: | |
| results = list( | |
| tqdm( | |
| pool.imap_unordered(adjust_path_and_remove_silence, files), | |
| total=len(files), | |
| desc="Processing audio files", | |
| ) | |
| ) | |
| for output_path, is_speech in results: | |
| if not is_speech: | |
| filtered_files.append(output_path) | |
| else: | |
| for f in tqdm(files): | |
| output_path, is_speech = adjust_path_and_remove_silence(f) | |
| if not is_speech: | |
| filtered_files.append(output_path) | |
| # write files that do not have speech | |
| with open(os.path.join(args.output_dir, "filtered_files.txt"), "w", encoding="utf-8") as f: | |
| for file in filtered_files: | |
| f.write(str(file) + "\n") | |
| else: | |
| print("> No files Found !") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser( | |
| description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end True" | |
| ) | |
| parser.add_argument("-i", "--input_dir", type=str, help="Dataset root dir", required=True) | |
| parser.add_argument("-o", "--output_dir", type=str, help="Output Dataset dir", default="") | |
| parser.add_argument("-f", "--force", default=False, action="store_true", help="Force the replace of exists files") | |
| parser.add_argument( | |
| "-g", | |
| "--glob", | |
| type=str, | |
| default="**/*.wav", | |
| help="path in glob format for acess wavs from input_dir. ex: wav48/*/*.wav", | |
| ) | |
| parser.add_argument( | |
| "-t", | |
| "--trim_just_beginning_and_end", | |
| type=bool, | |
| default=True, | |
| help="If True this script will trim just the beginning and end nonspeech parts. If False all nonspeech parts will be trim. Default True", | |
| ) | |
| parser.add_argument( | |
| "-c", | |
| "--use_cuda", | |
| type=bool, | |
| default=False, | |
| help="If True use cuda", | |
| ) | |
| parser.add_argument( | |
| "--use_onnx", | |
| type=bool, | |
| default=False, | |
| help="If True use onnx", | |
| ) | |
| parser.add_argument( | |
| "--num_processes", | |
| type=int, | |
| default=1, | |
| help="Number of processes to use", | |
| ) | |
| args = parser.parse_args() | |
| if args.output_dir == "": | |
| args.output_dir = args.input_dir | |
| # load the model and utils | |
| model_and_utils = get_vad_model_and_utils(use_cuda=args.use_cuda, use_onnx=args.use_onnx) | |
| preprocess_audios() | |