<a href="https://colab.research.google.com/github/ArkanDash/Advanced-RVC-Inference/blob/master/Advanced-RVC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1><div align="center"> Advanced RVC Inference:

<big> for quicker and effortless model downloads

---

[Support](https://discord.gg/hvmsukmBHE) — [GitHub](https://github.com/ArkanDash/Advanced-RVC-Inference.git)

In [None]:
#@title Check GPU
!nvidia-smi

In [None]:
# @title Installation


from IPython.display import clear_output



url = "https://github.com/ArkanDash/Advanced-RVC-Inference.git"

!git clone $url /content/program_infer
clear_output()

%cd /content/program_infer


!pip install -r requirements.txt
!pip uninstall torch torchvision torchaudio -y
!pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --upgrade --index-url https://download.pytorch.org/whl/cu121
clear_output()
print("Finished installing requirements!")

In [None]:
#@title Run WebUI


iyalah = "app.py"
print("running WebUI")
!python $iyalah --share

## Run NoUI
<div align="center">

•created by [NeoDev](https://github.com/TheNeodev)•

In [None]:
# @title Download model
# @markdown Hugging Face or Google Drive
model_link = "https://huggingface.co/Bredvige/Sonic2/resolve/main/Sonic.zip"  # @param {type:"string"}

!python scrpt.py download --model_link "{model_link}"

In [None]:

#@title run Advanced-RVC

import os
import sys
import yt_dlp
import subprocess
import logging
import json
from logging.handlers import RotatingFileHandler
from contextlib import suppress
import gradio as gr
import librosa
import numpy as np
import soundfile as sf
from pydub import AudioSegment
# Import the UVR separator. Ensure the module is available.
try:
    from audio_separator.separator import Separator
except ImportError:
    raise ImportError("Make sure the 'audio_separator' module is installed or in your working directory.")

from rvc.lib.tools.prerequisites_download import prerequisites_download_pipeline

if __name__ == "__main__":
    prerequisites_download_pipeline(models=True, exe=True)


# =============================================================================
# Logging Setup
# =============================================================================

def setup_logging(log_level=logging.DEBUG, log_file="kuro_rvc.log"):
    """
    Set up advanced logging with both console and rotating file handlers.
    """
    logger = logging.getLogger()
    logger.setLevel(log_level)

    # Formatter for both handlers
    formatter = logging.Formatter(
        fmt="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S"
    )

    # Console handler (INFO level and above)
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setLevel(logging.INFO)
    console_handler.setFormatter(formatter)

    # Rotating file handler (DEBUG level and above)
    file_handler = RotatingFileHandler(log_file, maxBytes=5*1024*1024, backupCount=2)
    file_handler.setLevel(log_level)
    file_handler.setFormatter(formatter)

    # Clear existing handlers, then add ours
    if logger.hasHandlers():
        logger.handlers.clear()
    logger.addHandler(console_handler)
    logger.addHandler(file_handler)
    logger.debug("...logging has been configured.")

# Initialize logging as early as possible
setup_logging()

# =============================================================================
# Directories and File Paths
# =============================================================================

current_dir = os.getcwd()
rvc_models_dir = os.path.join(current_dir, 'logs')
rvc_output_dir = os.path.join(current_dir, 'song_output')
download_dir = os.path.join(current_dir, "downloads")
uvr_output_dir = os.path.join(current_dir, "output_uvr")

# File paths for separated stems (using uvr_output_dir)
vocals_path = os.path.join(uvr_output_dir, 'Vocals.wav')
instrumental_path = os.path.join(uvr_output_dir, 'Instrumental.wav')
lead_vocals_path = os.path.join(uvr_output_dir, 'Lead_Vocals.wav')
backing_vocals_path = os.path.join(uvr_output_dir, 'Backing_Vocals.wav')

# File paths for RVC inference outputs
rvc_lead_output = os.path.join(rvc_output_dir, "rvc_result_lead.wav")
rvc_backing_output = os.path.join(rvc_output_dir, "rvc_result_backing.wav")

# Path to the RVC  script (ensure it exists in the current directory)
rvc_cli_file = os.path.join(current_dir, "scrpt.py")
if not os.path.exists(rvc_cli_file):
    logging.error("scrpt.py not found in the current directory: %s", current_dir)
    raise FileNotFoundError("scrpt.py not found in the current directory.")

# =============================================================================
# Inference and Pipeline Parameters (Colab UI parameters below)
# =============================================================================

model_name = "Sonic"  # @param {type:"string"}
youtube_url = "https://youtu.be/eCkWlRL3_N0?si=y6xHAs1m8fYVLTUV"  # @param {type:"string"}
export_format = "WAV"  # @param ['WAV', 'MP3', 'FLAC', 'OGG', 'M4A']
f0_method = "hybrid[rmvpe+fcpe]"  # @param ["crepe", "crepe-tiny", "rmvpe", "fcpe", "hybrid[rmvpe+fcpe]"]
f0_up_key = 0  # @param {type:"slider", min:-24, max:24, step:0}
filter_radius = 3  # @param {type:"slider", min:0, max:10, step:0}
rms_mix_rate = 0.8  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
protect = 0.5  # @param {type:"slider", min:0.0, max:0.5, step:0.1}
index_rate = 0.6  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
hop_length = 128  # @param {type:"slider", min:1, max:512, step:0}
clean_strength = 0.7  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
split_audio = False  # @param {type:"boolean"}
clean_audio = False  # @param {type:"boolean"}
f0_autotune = False  # @param {type:"boolean"}
backing_vocal_infer = False  # @param {type:"boolean"}
embedder_model = "contentvec"  # @param ["contentvec", "chinese-hubert-base", "japanese-hubert-base", "korean-hubert-base", "custom"]
embedder_model_custom = ""  # @param {type:"string"}
output_filename = f"aicover_{model_name}_opt"
logging.info("This code was written by [NeoDev](https://github.com/TheNeodev). Please credit if you copy or modify the code.")

# =============================================================================
# Function Definitions
# =============================================================================

def download_youtube_audio(url, download_dir):
    """
    Download audio from a YouTube URL and return the path(s) to the downloaded WAV file(s).
    """
    logging.debug("Starting YouTube audio download. URL: %s", url)
    os.makedirs(download_dir, exist_ok=True)
    outtmpl = os.path.join(download_dir, "%(title)s.%(ext)s")
    ydl_opts = {
        "format": "bestaudio/best",
        "outtmpl": outtmpl,
        "postprocessors": [{
            "key": "FFmpegExtractAudio",
            "preferredcodec": "wav",
            "preferredquality": "192"
        }],
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(url, download=True)
    if "entries" in info_dict:  # Playlist support
        downloaded_files = [os.path.join(download_dir, f"{entry['title']}.wav") for entry in info_dict["entries"] if entry]
    else:
        downloaded_files = os.path.join(download_dir, f"{info_dict['title']}.wav")
    logging.debug("Downloaded audio file(s): %s", downloaded_files)
    return downloaded_files

def separator_uvr(input_audio, output_dir):
    """
    Separate the input audio into instrumental and vocal stems,
    then further separate vocals into lead and backing vocals.
    Returns the paths to the lead and backing vocal files.
    """
    logging.debug("Starting UVR separation for file: %s", input_audio)
    os.makedirs(output_dir, exist_ok=True)

    # First separation: get instrumental and vocals
    uvr_separator = Separator(output_dir=output_dir)
    logging.debug("Loading first UVR model for instrumental/vocals separation.")
    uvr_separator.load_model('model_bs_roformer_ep_317_sdr_12.9755.ckpt')
    separated_files = uvr_separator.separate(input_audio)
    if len(separated_files) < 2:
        error_msg = "UVR separation did not produce expected files for instrumental/vocals."
        logging.error(error_msg)
        raise RuntimeError(error_msg)

    # Rename the separated files to our designated paths
    os.rename(os.path.join(output_dir, separated_files[0]), instrumental_path)
    os.rename(os.path.join(output_dir, separated_files[1]), vocals_path)
    logging.debug("Separated instrumental saved to: %s", instrumental_path)
    logging.debug("Separated vocals saved to: %s", vocals_path)

    # Second separation: split vocals into lead and backing
    logging.debug("Loading second UVR model for vocal splitting.")
    uvr_separator.load_model('mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt')
    separated_vocals = uvr_separator.separate(vocals_path)
    if len(separated_vocals) < 2:
        error_msg = "UVR separation did not produce expected files for vocal split."
        logging.error(error_msg)
        raise RuntimeError(error_msg)

    os.rename(os.path.join(output_dir, separated_vocals[0]), backing_vocals_path)
    os.rename(os.path.join(output_dir, separated_vocals[1]), lead_vocals_path)
    logging.debug("Separated backing vocals saved to: %s", backing_vocals_path)
    logging.debug("Separated lead vocals saved to: %s", lead_vocals_path)

    return lead_vocals_path, backing_vocals_path

def run_rvc(f0_up_key, filter_radius, rms_mix_rate, index_rate, hop_length, protect,
            f0_method, input_path, output_path, pth_file, index_file, split_audio,
            clean_audio, clean_strength, export_format, f0_autotune,
            embedder_model, embedder_model_custom):
    """
    Run the RVC inference pipeline via the rvc_cli.py script.
    """
    logging.debug("Preparing RVC inference command for input file: %s", input_path)
    command = [
        sys.executable, rvc_cli_file, "infer",
        "--pitch", str(f0_up_key),
        "--filter_radius", str(filter_radius),
        "--volume_envelope", str(rms_mix_rate),
        "--index_rate", str(index_rate),
        "--hop_length", str(hop_length),
        "--protect", str(protect),
        "--f0_method", f0_method,
        "--f0_autotune", str(f0_autotune),
        "--input_path", input_path,
        "--output_path", output_path,
        "--pth_path", pth_file,
        "--index_path", index_file,
        "--split_audio", str(split_audio),
        "--clean_audio", str(clean_audio),
        "--clean_strength", str(clean_strength),
        "--export_format", export_format,
        "--embedder_model", embedder_model,
        "--embedder_model_custom", embedder_model_custom
    ]
    logging.info("Running RVC inference. Command: %s", " ".join(command))
    try:
        result = subprocess.run(command, check=True, capture_output=True, text=True)
        logging.debug("RVC inference stdout: %s", result.stdout)
        if result.stderr:
            logging.debug("RVC inference stderr: %s", result.stderr)
        logging.info("RVC inference completed for input: %s", input_path)
    except subprocess.CalledProcessError as e:
        logging.error("RVC inference failed for input: %s", input_path)
        logging.error("Error output: %s", e.stderr)
        raise e

def load_audio(file_path):
    """Load an audio file using pydub if it exists."""
    if file_path and os.path.exists(file_path):
        logging.debug("Loading audio file: %s", file_path)
        return AudioSegment.from_file(file_path)
    else:
        logging.warning("Audio file not found: %s", file_path)
        return None

# =============================================================================
# Main Execution Function
# =============================================================================

def main():
    logging.info("Starting Advanced-RVC pipeline.")

    # Check model folder and required model files
    model_folder = os.path.join(rvc_models_dir, model_name)
    if not os.path.exists(model_folder):
        error_msg = f"Model directory not found: {model_folder}"
        logging.error(error_msg)
        raise FileNotFoundError(error_msg)
    files_in_folder = os.listdir(model_folder)
    pth_filename = next((f for f in files_in_folder if f.endswith(".pth")), None)
    index_filename = next((f for f in files_in_folder if f.endswith(".index")), None)
    if not pth_filename or not index_filename:
        error_msg = "Required model files (.pth or .index) were not found in the model folder."
        logging.error(error_msg)
        raise FileNotFoundError(error_msg)
    pth_file = os.path.join(model_folder, pth_filename)
    index_file = os.path.join(model_folder, index_filename)
    logging.debug("Model files located. PTH: %s, Index: %s", pth_file, index_file)

    # Download audio from YouTube
    logging.info("Downloading audio from YouTube...")
    downloaded_audio = download_youtube_audio(youtube_url, download_dir)
    input_audio = downloaded_audio[0] if isinstance(downloaded_audio, list) else downloaded_audio
    if not os.path.exists(input_audio):
        error_msg = f"Downloaded audio file not found: {input_audio}"
        logging.error(error_msg)
        raise FileNotFoundError(error_msg)
    logging.info("Audio downloaded successfully: %s", input_audio)

    # Run UVR separation
    logging.info("Running UVR separation...")
    lead_vocals_file, backing_vocals_file = separator_uvr(input_audio, uvr_output_dir)
    logging.info("UVR separation completed. Lead vocals: %s, Backing vocals: %s", lead_vocals_file, backing_vocals_file)

    # Ensure the output directory for RVC exists
    os.makedirs(rvc_output_dir, exist_ok=True)

    # Run RVC inference for lead vocals
    logging.info("Running RVC inference for lead vocals...")
    run_rvc(f0_up_key, filter_radius, rms_mix_rate, index_rate, hop_length, protect,
            f0_method, lead_vocals_path, rvc_lead_output, pth_file, index_file,
            split_audio, clean_audio, clean_strength, export_format, f0_autotune,
            embedder_model, embedder_model_custom)

    # Optionally run RVC inference for backing vocals
    if backing_vocal_infer:
        logging.info("Running RVC inference for backing vocals...")
        run_rvc(f0_up_key, filter_radius, rms_mix_rate, index_rate, hop_length, protect,
                f0_method, backing_vocals_path, rvc_backing_output, pth_file, index_file,
                split_audio, clean_audio, clean_strength, export_format, f0_autotune,
                embedder_model, embedder_model_custom)

    logging.info("RVC pipeline complete.")

    # Load the separated/inferred tracks for final mix
    logging.info("Loading audio tracks for final mix.")
    lead_vocals_audio = load_audio(rvc_lead_output)
    instrumental_audio = load_audio(instrumental_path)
    # If backing inference was run, load its result; otherwise use separated backing vocals.
    backing_vocals_audio = load_audio(rvc_backing_output) if backing_vocal_infer else load_audio(backing_vocals_path)

    if not instrumental_audio:
        error_msg = "Instrumental track is required for mixing!"
        logging.error(error_msg)
        raise ValueError(error_msg)

    # Mix the audio tracks: overlay lead vocals and backing vocals onto the instrumental
    final_mix = instrumental_audio
    if lead_vocals_audio:
        logging.debug("Overlaying lead vocals onto instrumental.")
        final_mix = final_mix.overlay(lead_vocals_audio)
    if backing_vocals_audio:
        logging.debug("Overlaying backing vocals onto instrumental.")
        final_mix = final_mix.overlay(backing_vocals_audio)

    # Export final mix to file
    output_file = f"{output_filename}.{export_format.lower()}"
    final_mix.export(output_file, format=export_format.lower())
    logging.info("✅ Mixed file saved as: %s", output_file)
    print(f"✅ Mixed file saved as: {output_file}")

# =============================================================================
# Run the Pipeline if Executed as a Script
# =============================================================================

if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        logging.exception("An error occurred during execution: %s", e)
        raise

In [None]:
#@title play ur audio output

output_file = f"{output_filename}.{export_format.lower()}"

AudioSegment.from_file(output_file)