LAP-DEV commited on
Commit
3aee269
·
verified ·
1 Parent(s): a345ba7

Upload music_separator.py

Browse files
Files changed (1) hide show
  1. modules/uvr/music_separator.py +185 -0
modules/uvr/music_separator.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Union, List, Dict
2
+ import numpy as np
3
+ import torchaudio
4
+ import soundfile as sf
5
+ import os
6
+ import torch
7
+ import gc
8
+ import gradio as gr
9
+ from datetime import datetime
10
+
11
+ from uvr.models import MDX, Demucs, VrNetwork, MDXC
12
+ from modules.utils.paths import DEFAULT_PARAMETERS_CONFIG_PATH, UVR_MODELS_DIR, UVR_OUTPUT_DIR
13
+ from modules.utils.files_manager import load_yaml, save_yaml, is_video
14
+ from modules.diarize.audio_loader import load_audio
15
+
16
+
17
+ class MusicSeparator:
18
+ def __init__(self,
19
+ model_dir: Optional[str] = UVR_MODELS_DIR,
20
+ output_dir: Optional[str] = UVR_OUTPUT_DIR):
21
+ self.model = None
22
+ self.device = self.get_device()
23
+ self.available_devices = ["cpu", "cuda"]
24
+ self.model_dir = model_dir
25
+ self.output_dir = output_dir
26
+ instrumental_output_dir = os.path.join(self.output_dir, "instrumental")
27
+ vocals_output_dir = os.path.join(self.output_dir, "vocals")
28
+ os.makedirs(instrumental_output_dir, exist_ok=True)
29
+ os.makedirs(vocals_output_dir, exist_ok=True)
30
+ self.audio_info = None
31
+ self.available_models = ["UVR-MDX-NET-Inst_HQ_4", "UVR-MDX-NET-Inst_3"]
32
+ self.default_model = self.available_models[0]
33
+ self.current_model_size = self.default_model
34
+ self.model_config = {
35
+ "segment": 256,
36
+ "split": True
37
+ }
38
+
39
+ def update_model(self,
40
+ model_name: str = "UVR-MDX-NET-Inst_1",
41
+ device: Optional[str] = None,
42
+ segment_size: int = 256):
43
+ """
44
+ Update model with the given model name
45
+
46
+ Args:
47
+ model_name (str): Model name.
48
+ device (str): Device to use for the model.
49
+ segment_size (int): Segment size for the prediction.
50
+ """
51
+ if device is None:
52
+ device = self.device
53
+
54
+ self.device = device
55
+ self.model_config = {
56
+ "segment": segment_size,
57
+ "split": True
58
+ }
59
+ self.model = MDX(name=model_name,
60
+ other_metadata=self.model_config,
61
+ device=self.device,
62
+ logger=None,
63
+ model_dir=self.model_dir)
64
+
65
+ def separate(self,
66
+ audio: Union[str, np.ndarray],
67
+ model_name: str,
68
+ device: Optional[str] = None,
69
+ segment_size: int = 256,
70
+ save_file: bool = False,
71
+ progress: gr.Progress = gr.Progress()) -> tuple[np.ndarray, np.ndarray, List]:
72
+ """
73
+ Separate the background music from the audio.
74
+
75
+ Args:
76
+ audio (Union[str, np.ndarray]): Audio path or numpy array.
77
+ model_name (str): Model name.
78
+ device (str): Device to use for the model.
79
+ segment_size (int): Segment size for the prediction.
80
+ save_file (bool): Whether to save the separated audio to output path or not.
81
+ progress (gr.Progress): Gradio progress indicator.
82
+
83
+ Returns:
84
+ A Tuple of
85
+ np.ndarray: Instrumental numpy arrays.
86
+ np.ndarray: Vocals numpy arrays.
87
+ file_paths: List of file paths where the separated audio is saved. Return empty when save_file is False.
88
+ """
89
+ if isinstance(audio, str):
90
+ output_filename, ext = os.path.basename(audio), ".wav"
91
+ output_filename, orig_ext = os.path.splitext(output_filename)
92
+
93
+ if is_video(audio):
94
+ audio = load_audio(audio)
95
+ sample_rate = 16000
96
+ else:
97
+ self.audio_info = torchaudio.info(audio)
98
+ sample_rate = self.audio_info.sample_rate
99
+ else:
100
+ timestamp = datetime.now().strftime("%m%d%H%M%S")
101
+ output_filename, ext = f"UVR-{timestamp}", ".wav"
102
+ sample_rate = 16000
103
+
104
+ model_config = {
105
+ "segment": segment_size,
106
+ "split": True
107
+ }
108
+
109
+ if (self.model is None or
110
+ self.current_model_size != model_name or
111
+ self.model_config != model_config or
112
+ self.model.sample_rate != sample_rate or
113
+ self.device != device):
114
+ progress(0, desc="Initializing UVR Model..")
115
+ self.update_model(
116
+ model_name=model_name,
117
+ device=device,
118
+ segment_size=segment_size
119
+ )
120
+ self.model.sample_rate = sample_rate
121
+
122
+ progress(0, desc="Separating background music from the audio.. "
123
+ "(It will only display 0% until the job is complete.) ")
124
+ result = self.model(audio)
125
+ instrumental, vocals = result["instrumental"].T, result["vocals"].T
126
+
127
+ file_paths = []
128
+ if save_file:
129
+ instrumental_output_path = os.path.join(self.output_dir, "instrumental", f"{output_filename}-instrumental{ext}")
130
+ vocals_output_path = os.path.join(self.output_dir, "vocals", f"{output_filename}-vocals{ext}")
131
+ sf.write(instrumental_output_path, instrumental, sample_rate, format="WAV")
132
+ sf.write(vocals_output_path, vocals, sample_rate, format="WAV")
133
+ file_paths += [instrumental_output_path, vocals_output_path]
134
+
135
+ return instrumental, vocals, file_paths
136
+
137
+ def separate_files(self,
138
+ files: List,
139
+ model_name: str,
140
+ device: Optional[str] = None,
141
+ segment_size: int = 256,
142
+ save_file: bool = True,
143
+ progress: gr.Progress = gr.Progress()) -> List[str]:
144
+ """Separate the background music from the audio files. Returns only last Instrumental and vocals file paths
145
+ to display into gr.Audio()"""
146
+ self.cache_parameters(model_size=model_name, segment_size=segment_size)
147
+
148
+ for file_path in files:
149
+ instrumental, vocals, file_paths = self.separate(
150
+ audio=file_path,
151
+ model_name=model_name,
152
+ device=device,
153
+ segment_size=segment_size,
154
+ save_file=save_file,
155
+ progress=progress
156
+ )
157
+ return file_paths
158
+
159
+ @staticmethod
160
+ def get_device():
161
+ """Get device for the model"""
162
+ return "cuda" if torch.cuda.is_available() else "cpu"
163
+
164
+ def offload(self):
165
+ """Offload the model and free up the memory"""
166
+ if self.model is not None:
167
+ del self.model
168
+ self.model = None
169
+ if self.device == "cuda":
170
+ torch.cuda.empty_cache()
171
+ gc.collect()
172
+ self.audio_info = None
173
+
174
+ @staticmethod
175
+ def cache_parameters(model_size: str,
176
+ segment_size: int):
177
+ cached_params = load_yaml(DEFAULT_PARAMETERS_CONFIG_PATH)
178
+ cached_uvr_params = cached_params["bgm_separation"]
179
+ uvr_params_to_cache = {
180
+ "model_size": model_size,
181
+ "segment_size": segment_size
182
+ }
183
+ cached_uvr_params = {**cached_uvr_params, **uvr_params_to_cache}
184
+ cached_params["bgm_separation"] = cached_uvr_params
185
+ save_yaml(cached_params, DEFAULT_PARAMETERS_CONFIG_PATH)