{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import random\n", "import shutil\n", "import sys\n", "\n", "libri_path = \"....../LibriTTS\" # absolute path to TensorFlowTTS.\n", "dataset_path = \"....../libritts\" # Change to your paths. This is a output of re-format dataset.\n", "subset = \"train-clean-100\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "with open(os.path.join(libri_path, \"SPEAKERS.txt\")) as f:\n", " data = f.readlines()\n", " \n", "dataset_info = {}\n", "max_speakers = 20 # Max number of speakers to train on\n", "min_len = 20 # Min len of speaker narration time\n", "max_file_len = 11 # max audio file lenght\n", "min_file_len = 2 # min audio file lenght" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "possible_dataset = [i.split(\"|\") for i in data[12:] if i.split(\"|\")[2].strip() == subset and float(i.split(\"|\")[3].strip()) >= min_len]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "ids = [i[0].strip() for i in possible_dataset]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import soundfile as sf" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "possible_map = {}\n", "subset_path = os.path.join(libri_path, subset)\n", "for i in os.listdir(subset_path):\n", " if i in ids:\n", " id_path = os.path.join(subset_path, i)\n", " id_dur = 0\n", " id_included = []\n", " \n", " for k in os.listdir(id_path):\n", " for j in os.listdir(os.path.join(id_path, k)):\n", " if \".wav\" in j:\n", " f_path = os.path.join(id_path, k, j)\n", " sf_file = sf.SoundFile(f_path)\n", " dur = len(sf_file) / sf_file.samplerate\n", " if max_file_len < dur or dur < min_file_len:\n", " continue\n", " else:\n", " id_included.append(f_path)\n", " id_dur += dur\n", " possible_map[i] = {\"dur\": id_dur, \"included\": id_included}\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "poss_speakers = {k: v[\"included\"] for k, v in possible_map.items() if v[\"dur\"]/60 >= min_len}" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "to_move = list(poss_speakers.keys())\n", "random.shuffle(to_move)\n", "to_move = to_move[:max_speakers]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "for sp_id, v in poss_speakers.items():\n", " if sp_id in to_move:\n", " for j in v:\n", " f_name = j.split(os.path.sep)[-1]\n", " text_f_name = f_name.split(\".wav\")[0] + \".txt\"\n", " os.makedirs(os.path.join(dataset_path, sp_id), exist_ok=True)\n", " shutil.copy(j, os.path.join(dataset_path, sp_id, f_name))\n", " shutil.copy(j.replace(\".wav\", \".normalized.txt\"), os.path.join(dataset_path, sp_id, text_f_name))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 }