voice-clone-app / src /data /download.py
hengjie yang
Initial commit: Voice Clone App with Gradio interface
9580089
raw
history blame
4.26 kB
import os
import requests
import tarfile
import zipfile
import shutil
from pathlib import Path
from tqdm import tqdm
import subprocess
def download_file(url: str, target_path: str):
"""使用requests下载文件,支持进度条"""
response = requests.get(url, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(target_path, 'wb') as file, tqdm(
desc="Downloading",
total=total_size,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as pbar:
for data in response.iter_content(chunk_size=1024):
size = file.write(data)
pbar.update(size)
def download_vctk(target_dir: str = "data/raw"):
"""下载VCTK数据集"""
url = "https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"
target_dir = Path(target_dir)
zip_path = target_dir / "vctk.zip"
# 创建目标目录
os.makedirs(target_dir, exist_ok=True)
# 下载数据集
if not zip_path.exists():
print("Downloading VCTK dataset...")
download_file(url, str(zip_path))
# 解压数据集
if not (target_dir / "VCTK-Corpus").exists():
print("\nExtracting VCTK dataset...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(target_dir)
# 整理文件结构
vctk_dir = target_dir / "VCTK-Corpus" / "wav48"
for speaker_dir in tqdm(os.listdir(vctk_dir), desc="Organizing files"):
if os.path.isdir(vctk_dir / speaker_dir):
# 移动音频文件
src_dir = vctk_dir / speaker_dir
dst_dir = target_dir / speaker_dir
if not dst_dir.exists():
shutil.copytree(src_dir, dst_dir)
# 清理下载文件
if zip_path.exists():
os.remove(zip_path)
if (target_dir / "VCTK-Corpus").exists():
shutil.rmtree(target_dir / "VCTK-Corpus")
def download_librispeech(target_dir: str = "data/raw", subset: str = "dev-clean"):
"""下载LibriSpeech数据集的一个子集"""
url = f"https://www.openslr.org/resources/12/{subset}.tar.gz"
target_dir = Path(target_dir)
tar_path = target_dir / f"librispeech_{subset}.tar.gz"
# 创建目标目录
os.makedirs(target_dir, exist_ok=True)
# 下载数据集
if not tar_path.exists():
print(f"Downloading LibriSpeech {subset} dataset...")
download_file(url, str(tar_path))
# 解压数据集
if not (target_dir / "LibriSpeech").exists():
print(f"\nExtracting LibriSpeech {subset} dataset...")
with tarfile.open(tar_path, 'r:gz') as tar:
tar.extractall(target_dir)
# 整理文件结构
libri_dir = target_dir / "LibriSpeech" / subset
for speaker_dir in tqdm(os.listdir(libri_dir), desc="Organizing files"):
if os.path.isdir(libri_dir / speaker_dir):
# 移动音频文件
src_dir = libri_dir / speaker_dir
dst_dir = target_dir / f"libri_{speaker_dir}"
if not dst_dir.exists():
shutil.copytree(src_dir, dst_dir)
# 清理下载文件
if tar_path.exists():
os.remove(tar_path)
if (target_dir / "LibriSpeech").exists():
shutil.rmtree(target_dir / "LibriSpeech")
def download_aishell3(target_dir: str = "data/raw"):
"""下载AISHELL-3数据集(需要OpenSLR账号)"""
print("AISHELL-3 dataset needs to be downloaded manually from:")
print("https://www.openslr.org/93/")
print(f"Please download and extract it to {target_dir}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Download speech datasets")
parser.add_argument("--dataset", type=str, choices=["vctk", "librispeech", "aishell3"],
required=True, help="Dataset to download")
parser.add_argument("--target_dir", type=str, default="data/raw",
help="Directory to save the dataset")
args = parser.parse_args()
if args.dataset == "vctk":
download_vctk(args.target_dir)
elif args.dataset == "librispeech":
download_librispeech(args.target_dir)
else:
download_aishell3(args.target_dir)