Spaces:

pupunpu
/

voice-clone-app

Sleeping

voice-clone-app / src /data /download.py

hengjie yang

Initial commit: Voice Clone App with Gradio interface

9580089 6 months ago

4.26 kB

	import os
	import requests
	import tarfile
	import zipfile
	import shutil
	from pathlib import Path
	from tqdm import tqdm
	import subprocess

	def download_file(url: str, target_path: str):
	"""使用requests下载文件，支持进度条"""
	response = requests.get(url, stream=True)
	total_size = int(response.headers.get('content-length', 0))

	with open(target_path, 'wb') as file, tqdm(
	desc="Downloading",
	total=total_size,
	unit='iB',
	unit_scale=True,
	unit_divisor=1024,
	) as pbar:
	for data in response.iter_content(chunk_size=1024):
	size = file.write(data)
	pbar.update(size)

	def download_vctk(target_dir: str = "data/raw"):
	"""下载VCTK数据集"""
	url = "https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"
	target_dir = Path(target_dir)
	zip_path = target_dir / "vctk.zip"

	# 创建目标目录
	os.makedirs(target_dir, exist_ok=True)

	# 下载数据集
	if not zip_path.exists():
	print("Downloading VCTK dataset...")
	download_file(url, str(zip_path))

	# 解压数据集
	if not (target_dir / "VCTK-Corpus").exists():
	print("\nExtracting VCTK dataset...")
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall(target_dir)

	# 整理文件结构
	vctk_dir = target_dir / "VCTK-Corpus" / "wav48"
	for speaker_dir in tqdm(os.listdir(vctk_dir), desc="Organizing files"):
	if os.path.isdir(vctk_dir / speaker_dir):
	# 移动音频文件
	src_dir = vctk_dir / speaker_dir
	dst_dir = target_dir / speaker_dir
	if not dst_dir.exists():
	shutil.copytree(src_dir, dst_dir)

	# 清理下载文件
	if zip_path.exists():
	os.remove(zip_path)
	if (target_dir / "VCTK-Corpus").exists():
	shutil.rmtree(target_dir / "VCTK-Corpus")

	def download_librispeech(target_dir: str = "data/raw", subset: str = "dev-clean"):
	"""下载LibriSpeech数据集的一个子集"""
	url = f"https://www.openslr.org/resources/12/{subset}.tar.gz"
	target_dir = Path(target_dir)
	tar_path = target_dir / f"librispeech_{subset}.tar.gz"

	# 创建目标目录
	os.makedirs(target_dir, exist_ok=True)

	# 下载数据集
	if not tar_path.exists():
	print(f"Downloading LibriSpeech {subset} dataset...")
	download_file(url, str(tar_path))

	# 解压数据集
	if not (target_dir / "LibriSpeech").exists():
	print(f"\nExtracting LibriSpeech {subset} dataset...")
	with tarfile.open(tar_path, 'r:gz') as tar:
	tar.extractall(target_dir)

	# 整理文件结构
	libri_dir = target_dir / "LibriSpeech" / subset
	for speaker_dir in tqdm(os.listdir(libri_dir), desc="Organizing files"):
	if os.path.isdir(libri_dir / speaker_dir):
	# 移动音频文件
	src_dir = libri_dir / speaker_dir
	dst_dir = target_dir / f"libri_{speaker_dir}"
	if not dst_dir.exists():
	shutil.copytree(src_dir, dst_dir)

	# 清理下载文件
	if tar_path.exists():
	os.remove(tar_path)
	if (target_dir / "LibriSpeech").exists():
	shutil.rmtree(target_dir / "LibriSpeech")

	def download_aishell3(target_dir: str = "data/raw"):
	"""下载AISHELL-3数据集（需要OpenSLR账号）"""
	print("AISHELL-3 dataset needs to be downloaded manually from:")
	print("https://www.openslr.org/93/")
	print(f"Please download and extract it to {target_dir}")

	if __name__ == "__main__":
	import argparse
	parser = argparse.ArgumentParser(description="Download speech datasets")
	parser.add_argument("--dataset", type=str, choices=["vctk", "librispeech", "aishell3"],
	required=True, help="Dataset to download")
	parser.add_argument("--target_dir", type=str, default="data/raw",
	help="Directory to save the dataset")
	args = parser.parse_args()

	if args.dataset == "vctk":
	download_vctk(args.target_dir)
	elif args.dataset == "librispeech":
	download_librispeech(args.target_dir)
	else:
	download_aishell3(args.target_dir)