for quicker and effortless model downloads\n",
"\n",
"---\n",
"\n",
"[Support](https://discord.gg/hvmsukmBHE) — [GitHub](https://github.com/ArkanDash/Advanced-RVC-Inference.git)"
],
"metadata": {
"id": "FZUxBujkr91c"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "fl7Y_WjdrEO2"
},
"outputs": [],
"source": [
"#@title Check GPU\n",
"!nvidia-smi"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "sfqNqmS-rEPK"
},
"outputs": [],
"source": [
"# @title Installation\n",
"\n",
"\n",
"from IPython.display import clear_output\n",
"\n",
"\n",
"\n",
"url = \"https://github.com/ArkanDash/Advanced-RVC-Inference.git\"\n",
"\n",
"!git clone $url /content/program_infer\n",
"clear_output()\n",
"\n",
"%cd /content/program_infer\n",
"\n",
"\n",
"!pip install -r requirements.txt\n",
"!pip uninstall torch torchvision torchaudio -y\n",
"!pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --upgrade --index-url https://download.pytorch.org/whl/cu121\n",
"clear_output()\n",
"print(\"Finished installing requirements!\")"
]
},
{
"cell_type": "code",
"source": [
"#@title Run WebUI\n",
"\n",
"\n",
"iyalah = \"app.py\"\n",
"print(\"running WebUI\")\n",
"!python $iyalah --share"
],
"metadata": {
"cellView": "form",
"id": "AJZH4XDOKnK3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Run NoUI\n",
"\n",
"\n",
"•created by [NeoDev](https://github.com/TheNeodev)•"
],
"metadata": {
"id": "MO_UV5ZhKOTF"
}
},
{
"cell_type": "code",
"source": [
"# @title Download model\n",
"# @markdown Hugging Face or Google Drive\n",
"model_link = \"https://huggingface.co/Bredvige/Sonic2/resolve/main/Sonic.zip\" # @param {type:\"string\"}\n",
"\n",
"!python scrpt.py download --model_link \"{model_link}\""
],
"metadata": {
"cellView": "form",
"id": "qk74gqJqEB_A"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\n",
"#@title run Advanced-RVC\n",
"\n",
"import os\n",
"import sys\n",
"import yt_dlp\n",
"import subprocess\n",
"import logging\n",
"import json\n",
"from logging.handlers import RotatingFileHandler\n",
"from contextlib import suppress\n",
"import gradio as gr\n",
"import librosa\n",
"import numpy as np\n",
"import soundfile as sf\n",
"from pydub import AudioSegment\n",
"# Import the UVR separator. Ensure the module is available.\n",
"try:\n",
" from audio_separator.separator import Separator\n",
"except ImportError:\n",
" raise ImportError(\"Make sure the 'audio_separator' module is installed or in your working directory.\")\n",
"\n",
"from rvc.lib.tools.prerequisites_download import prerequisites_download_pipeline\n",
"\n",
"if __name__ == \"__main__\":\n",
" prerequisites_download_pipeline(models=True, exe=True)\n",
"\n",
"\n",
"# =============================================================================\n",
"# Logging Setup\n",
"# =============================================================================\n",
"\n",
"def setup_logging(log_level=logging.DEBUG, log_file=\"kuro_rvc.log\"):\n",
" \"\"\"\n",
" Set up advanced logging with both console and rotating file handlers.\n",
" \"\"\"\n",
" logger = logging.getLogger()\n",
" logger.setLevel(log_level)\n",
"\n",
" # Formatter for both handlers\n",
" formatter = logging.Formatter(\n",
" fmt=\"%(asctime)s [%(levelname)s] %(name)s: %(message)s\",\n",
" datefmt=\"%Y-%m-%d %H:%M:%S\"\n",
" )\n",
"\n",
" # Console handler (INFO level and above)\n",
" console_handler = logging.StreamHandler(sys.stdout)\n",
" console_handler.setLevel(logging.INFO)\n",
" console_handler.setFormatter(formatter)\n",
"\n",
" # Rotating file handler (DEBUG level and above)\n",
" file_handler = RotatingFileHandler(log_file, maxBytes=5*1024*1024, backupCount=2)\n",
" file_handler.setLevel(log_level)\n",
" file_handler.setFormatter(formatter)\n",
"\n",
" # Clear existing handlers, then add ours\n",
" if logger.hasHandlers():\n",
" logger.handlers.clear()\n",
" logger.addHandler(console_handler)\n",
" logger.addHandler(file_handler)\n",
" logger.debug(\"...logging has been configured.\")\n",
"\n",
"# Initialize logging as early as possible\n",
"setup_logging()\n",
"\n",
"# =============================================================================\n",
"# Directories and File Paths\n",
"# =============================================================================\n",
"\n",
"current_dir = os.getcwd()\n",
"rvc_models_dir = os.path.join(current_dir, 'logs')\n",
"rvc_output_dir = os.path.join(current_dir, 'song_output')\n",
"download_dir = os.path.join(current_dir, \"downloads\")\n",
"uvr_output_dir = os.path.join(current_dir, \"output_uvr\")\n",
"\n",
"# File paths for separated stems (using uvr_output_dir)\n",
"vocals_path = os.path.join(uvr_output_dir, 'Vocals.wav')\n",
"instrumental_path = os.path.join(uvr_output_dir, 'Instrumental.wav')\n",
"lead_vocals_path = os.path.join(uvr_output_dir, 'Lead_Vocals.wav')\n",
"backing_vocals_path = os.path.join(uvr_output_dir, 'Backing_Vocals.wav')\n",
"\n",
"# File paths for RVC inference outputs\n",
"rvc_lead_output = os.path.join(rvc_output_dir, \"rvc_result_lead.wav\")\n",
"rvc_backing_output = os.path.join(rvc_output_dir, \"rvc_result_backing.wav\")\n",
"\n",
"# Path to the RVC script (ensure it exists in the current directory)\n",
"rvc_cli_file = os.path.join(current_dir, \"scrpt.py\")\n",
"if not os.path.exists(rvc_cli_file):\n",
" logging.error(\"scrpt.py not found in the current directory: %s\", current_dir)\n",
" raise FileNotFoundError(\"scrpt.py not found in the current directory.\")\n",
"\n",
"# =============================================================================\n",
"# Inference and Pipeline Parameters (Colab UI parameters below)\n",
"# =============================================================================\n",
"\n",
"model_name = \"Sonic\" # @param {type:\"string\"}\n",
"youtube_url = \"https://youtu.be/eCkWlRL3_N0?si=y6xHAs1m8fYVLTUV\" # @param {type:\"string\"}\n",
"export_format = \"WAV\" # @param ['WAV', 'MP3', 'FLAC', 'OGG', 'M4A']\n",
"f0_method = \"hybrid[rmvpe+fcpe]\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\", \"fcpe\", \"hybrid[rmvpe+fcpe]\"]\n",
"f0_up_key = 0 # @param {type:\"slider\", min:-24, max:24, step:0}\n",
"filter_radius = 3 # @param {type:\"slider\", min:0, max:10, step:0}\n",
"rms_mix_rate = 0.8 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n",
"protect = 0.5 # @param {type:\"slider\", min:0.0, max:0.5, step:0.1}\n",
"index_rate = 0.6 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n",
"hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n",
"clean_strength = 0.7 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n",
"split_audio = False # @param {type:\"boolean\"}\n",
"clean_audio = False # @param {type:\"boolean\"}\n",
"f0_autotune = False # @param {type:\"boolean\"}\n",
"backing_vocal_infer = False # @param {type:\"boolean\"}\n",
"embedder_model = \"contentvec\" # @param [\"contentvec\", \"chinese-hubert-base\", \"japanese-hubert-base\", \"korean-hubert-base\", \"custom\"]\n",
"embedder_model_custom = \"\" # @param {type:\"string\"}\n",
"output_filename = f\"aicover_{model_name}_opt\"\n",
"logging.info(\"This code was written by [NeoDev](https://github.com/TheNeodev). Please credit if you copy or modify the code.\")\n",
"\n",
"# =============================================================================\n",
"# Function Definitions\n",
"# =============================================================================\n",
"\n",
"def download_youtube_audio(url, download_dir):\n",
" \"\"\"\n",
" Download audio from a YouTube URL and return the path(s) to the downloaded WAV file(s).\n",
" \"\"\"\n",
" logging.debug(\"Starting YouTube audio download. URL: %s\", url)\n",
" os.makedirs(download_dir, exist_ok=True)\n",
" outtmpl = os.path.join(download_dir, \"%(title)s.%(ext)s\")\n",
" ydl_opts = {\n",
" \"format\": \"bestaudio/best\",\n",
" \"outtmpl\": outtmpl,\n",
" \"postprocessors\": [{\n",
" \"key\": \"FFmpegExtractAudio\",\n",
" \"preferredcodec\": \"wav\",\n",
" \"preferredquality\": \"192\"\n",
" }],\n",
" }\n",
" with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
" info_dict = ydl.extract_info(url, download=True)\n",
" if \"entries\" in info_dict: # Playlist support\n",
" downloaded_files = [os.path.join(download_dir, f\"{entry['title']}.wav\") for entry in info_dict[\"entries\"] if entry]\n",
" else:\n",
" downloaded_files = os.path.join(download_dir, f\"{info_dict['title']}.wav\")\n",
" logging.debug(\"Downloaded audio file(s): %s\", downloaded_files)\n",
" return downloaded_files\n",
"\n",
"def separator_uvr(input_audio, output_dir):\n",
" \"\"\"\n",
" Separate the input audio into instrumental and vocal stems,\n",
" then further separate vocals into lead and backing vocals.\n",
" Returns the paths to the lead and backing vocal files.\n",
" \"\"\"\n",
" logging.debug(\"Starting UVR separation for file: %s\", input_audio)\n",
" os.makedirs(output_dir, exist_ok=True)\n",
"\n",
" # First separation: get instrumental and vocals\n",
" uvr_separator = Separator(output_dir=output_dir)\n",
" logging.debug(\"Loading first UVR model for instrumental/vocals separation.\")\n",
" uvr_separator.load_model('model_bs_roformer_ep_317_sdr_12.9755.ckpt')\n",
" separated_files = uvr_separator.separate(input_audio)\n",
" if len(separated_files) < 2:\n",
" error_msg = \"UVR separation did not produce expected files for instrumental/vocals.\"\n",
" logging.error(error_msg)\n",
" raise RuntimeError(error_msg)\n",
"\n",
" # Rename the separated files to our designated paths\n",
" os.rename(os.path.join(output_dir, separated_files[0]), instrumental_path)\n",
" os.rename(os.path.join(output_dir, separated_files[1]), vocals_path)\n",
" logging.debug(\"Separated instrumental saved to: %s\", instrumental_path)\n",
" logging.debug(\"Separated vocals saved to: %s\", vocals_path)\n",
"\n",
" # Second separation: split vocals into lead and backing\n",
" logging.debug(\"Loading second UVR model for vocal splitting.\")\n",
" uvr_separator.load_model('mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt')\n",
" separated_vocals = uvr_separator.separate(vocals_path)\n",
" if len(separated_vocals) < 2:\n",
" error_msg = \"UVR separation did not produce expected files for vocal split.\"\n",
" logging.error(error_msg)\n",
" raise RuntimeError(error_msg)\n",
"\n",
" os.rename(os.path.join(output_dir, separated_vocals[0]), backing_vocals_path)\n",
" os.rename(os.path.join(output_dir, separated_vocals[1]), lead_vocals_path)\n",
" logging.debug(\"Separated backing vocals saved to: %s\", backing_vocals_path)\n",
" logging.debug(\"Separated lead vocals saved to: %s\", lead_vocals_path)\n",
"\n",
" return lead_vocals_path, backing_vocals_path\n",
"\n",
"def run_rvc(f0_up_key, filter_radius, rms_mix_rate, index_rate, hop_length, protect,\n",
" f0_method, input_path, output_path, pth_file, index_file, split_audio,\n",
" clean_audio, clean_strength, export_format, f0_autotune,\n",
" embedder_model, embedder_model_custom):\n",
" \"\"\"\n",
" Run the RVC inference pipeline via the rvc_cli.py script.\n",
" \"\"\"\n",
" logging.debug(\"Preparing RVC inference command for input file: %s\", input_path)\n",
" command = [\n",
" sys.executable, rvc_cli_file, \"infer\",\n",
" \"--pitch\", str(f0_up_key),\n",
" \"--filter_radius\", str(filter_radius),\n",
" \"--volume_envelope\", str(rms_mix_rate),\n",
" \"--index_rate\", str(index_rate),\n",
" \"--hop_length\", str(hop_length),\n",
" \"--protect\", str(protect),\n",
" \"--f0_method\", f0_method,\n",
" \"--f0_autotune\", str(f0_autotune),\n",
" \"--input_path\", input_path,\n",
" \"--output_path\", output_path,\n",
" \"--pth_path\", pth_file,\n",
" \"--index_path\", index_file,\n",
" \"--split_audio\", str(split_audio),\n",
" \"--clean_audio\", str(clean_audio),\n",
" \"--clean_strength\", str(clean_strength),\n",
" \"--export_format\", export_format,\n",
" \"--embedder_model\", embedder_model,\n",
" \"--embedder_model_custom\", embedder_model_custom\n",
" ]\n",
" logging.info(\"Running RVC inference. Command: %s\", \" \".join(command))\n",
" try:\n",
" result = subprocess.run(command, check=True, capture_output=True, text=True)\n",
" logging.debug(\"RVC inference stdout: %s\", result.stdout)\n",
" if result.stderr:\n",
" logging.debug(\"RVC inference stderr: %s\", result.stderr)\n",
" logging.info(\"RVC inference completed for input: %s\", input_path)\n",
" except subprocess.CalledProcessError as e:\n",
" logging.error(\"RVC inference failed for input: %s\", input_path)\n",
" logging.error(\"Error output: %s\", e.stderr)\n",
" raise e\n",
"\n",
"def load_audio(file_path):\n",
" \"\"\"Load an audio file using pydub if it exists.\"\"\"\n",
" if file_path and os.path.exists(file_path):\n",
" logging.debug(\"Loading audio file: %s\", file_path)\n",
" return AudioSegment.from_file(file_path)\n",
" else:\n",
" logging.warning(\"Audio file not found: %s\", file_path)\n",
" return None\n",
"\n",
"# =============================================================================\n",
"# Main Execution Function\n",
"# =============================================================================\n",
"\n",
"def main():\n",
" logging.info(\"Starting Advanced-RVC pipeline.\")\n",
"\n",
" # Check model folder and required model files\n",
" model_folder = os.path.join(rvc_models_dir, model_name)\n",
" if not os.path.exists(model_folder):\n",
" error_msg = f\"Model directory not found: {model_folder}\"\n",
" logging.error(error_msg)\n",
" raise FileNotFoundError(error_msg)\n",
" files_in_folder = os.listdir(model_folder)\n",
" pth_filename = next((f for f in files_in_folder if f.endswith(\".pth\")), None)\n",
" index_filename = next((f for f in files_in_folder if f.endswith(\".index\")), None)\n",
" if not pth_filename or not index_filename:\n",
" error_msg = \"Required model files (.pth or .index) were not found in the model folder.\"\n",
" logging.error(error_msg)\n",
" raise FileNotFoundError(error_msg)\n",
" pth_file = os.path.join(model_folder, pth_filename)\n",
" index_file = os.path.join(model_folder, index_filename)\n",
" logging.debug(\"Model files located. PTH: %s, Index: %s\", pth_file, index_file)\n",
"\n",
" # Download audio from YouTube\n",
" logging.info(\"Downloading audio from YouTube...\")\n",
" downloaded_audio = download_youtube_audio(youtube_url, download_dir)\n",
" input_audio = downloaded_audio[0] if isinstance(downloaded_audio, list) else downloaded_audio\n",
" if not os.path.exists(input_audio):\n",
" error_msg = f\"Downloaded audio file not found: {input_audio}\"\n",
" logging.error(error_msg)\n",
" raise FileNotFoundError(error_msg)\n",
" logging.info(\"Audio downloaded successfully: %s\", input_audio)\n",
"\n",
" # Run UVR separation\n",
" logging.info(\"Running UVR separation...\")\n",
" lead_vocals_file, backing_vocals_file = separator_uvr(input_audio, uvr_output_dir)\n",
" logging.info(\"UVR separation completed. Lead vocals: %s, Backing vocals: %s\", lead_vocals_file, backing_vocals_file)\n",
"\n",
" # Ensure the output directory for RVC exists\n",
" os.makedirs(rvc_output_dir, exist_ok=True)\n",
"\n",
" # Run RVC inference for lead vocals\n",
" logging.info(\"Running RVC inference for lead vocals...\")\n",
" run_rvc(f0_up_key, filter_radius, rms_mix_rate, index_rate, hop_length, protect,\n",
" f0_method, lead_vocals_path, rvc_lead_output, pth_file, index_file,\n",
" split_audio, clean_audio, clean_strength, export_format, f0_autotune,\n",
" embedder_model, embedder_model_custom)\n",
"\n",
" # Optionally run RVC inference for backing vocals\n",
" if backing_vocal_infer:\n",
" logging.info(\"Running RVC inference for backing vocals...\")\n",
" run_rvc(f0_up_key, filter_radius, rms_mix_rate, index_rate, hop_length, protect,\n",
" f0_method, backing_vocals_path, rvc_backing_output, pth_file, index_file,\n",
" split_audio, clean_audio, clean_strength, export_format, f0_autotune,\n",
" embedder_model, embedder_model_custom)\n",
"\n",
" logging.info(\"RVC pipeline complete.\")\n",
"\n",
" # Load the separated/inferred tracks for final mix\n",
" logging.info(\"Loading audio tracks for final mix.\")\n",
" lead_vocals_audio = load_audio(rvc_lead_output)\n",
" instrumental_audio = load_audio(instrumental_path)\n",
" # If backing inference was run, load its result; otherwise use separated backing vocals.\n",
" backing_vocals_audio = load_audio(rvc_backing_output) if backing_vocal_infer else load_audio(backing_vocals_path)\n",
"\n",
" if not instrumental_audio:\n",
" error_msg = \"Instrumental track is required for mixing!\"\n",
" logging.error(error_msg)\n",
" raise ValueError(error_msg)\n",
"\n",
" # Mix the audio tracks: overlay lead vocals and backing vocals onto the instrumental\n",
" final_mix = instrumental_audio\n",
" if lead_vocals_audio:\n",
" logging.debug(\"Overlaying lead vocals onto instrumental.\")\n",
" final_mix = final_mix.overlay(lead_vocals_audio)\n",
" if backing_vocals_audio:\n",
" logging.debug(\"Overlaying backing vocals onto instrumental.\")\n",
" final_mix = final_mix.overlay(backing_vocals_audio)\n",
"\n",
" # Export final mix to file\n",
" output_file = f\"{output_filename}.{export_format.lower()}\"\n",
" final_mix.export(output_file, format=export_format.lower())\n",
" logging.info(\"✅ Mixed file saved as: %s\", output_file)\n",
" print(f\"✅ Mixed file saved as: {output_file}\")\n",
"\n",
"# =============================================================================\n",
"# Run the Pipeline if Executed as a Script\n",
"# =============================================================================\n",
"\n",
"if __name__ == \"__main__\":\n",
" try:\n",
" main()\n",
" except Exception as e:\n",
" logging.exception(\"An error occurred during execution: %s\", e)\n",
" raise"
],
"metadata": {
"cellView": "form",
"id": "9-KMNp7tFrEk"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title play ur audio output\n",
"\n",
"output_file = f\"{output_filename}.{export_format.lower()}\"\n",
"\n",
"AudioSegment.from_file(output_file)"
],
"metadata": {
"cellView": "form",
"id": "NvxvDUUOrYd-"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4,
"colab": {
"provenance": [],
"gpuType": "T4",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"nbformat": 4,
"nbformat_minor": 0
}