Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,106 +1,65 @@
|
|
1 |
-
import os
|
2 |
-
import re
|
3 |
-
import random
|
4 |
-
from scipy.io.wavfile import write, read
|
5 |
-
import numpy as np
|
6 |
import gradio as gr
|
7 |
-
import
|
8 |
-
|
9 |
-
# Model dictionaries and lists
|
10 |
-
uvr5_models = {
|
11 |
-
'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
|
12 |
-
'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
|
13 |
-
'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
|
14 |
-
'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',
|
15 |
-
'MDX23C-8KFFT-InstVoc_HQ.ckpt',
|
16 |
-
'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
|
17 |
-
'Kim_Vocal_1.onnx',
|
18 |
-
'Kim_Vocal_2.onnx',
|
19 |
-
'Kim_Inst.onnx',
|
20 |
-
'Reverb_HQ_By_FoxJoy.onnx',
|
21 |
-
'UVR-MDX-NET_Crowd_HQ_1.onnx',
|
22 |
-
'UVR-De-Echo-Aggressive.pth',
|
23 |
-
'UVR-De-Echo-Normal.pth',
|
24 |
-
'UVR-DeEcho-DeReverb.pth',
|
25 |
-
'UVR-DeNoise-Lite.pth',
|
26 |
-
'UVR-DeNoise.pth',
|
27 |
-
'UVR-BVE-4B_SN-44100-1.pth',
|
28 |
-
'htdemucs_ft.yaml',
|
29 |
-
'htdemucs.yaml',
|
30 |
-
'hdemucs_mmi.yaml',
|
31 |
-
}
|
32 |
-
|
33 |
|
|
|
|
|
|
|
34 |
|
35 |
-
#
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
#
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
'outtmpl': 'ytdl/%(title)s.%(ext)s',
|
47 |
-
'postprocessors': [{
|
48 |
-
'key': 'FFmpegExtractAudio',
|
49 |
-
'preferredcodec': 'wav',
|
50 |
-
'preferredquality': '192',
|
51 |
-
}],
|
52 |
-
}
|
53 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
54 |
-
info_dict = ydl.extract_info(url, download=True)
|
55 |
-
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
|
56 |
-
sample_rate, audio_data = read(file_path)
|
57 |
-
audio_array = np.asarray(audio_data, dtype=np.int16)
|
58 |
-
return sample_rate, audio_array
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
write(f'{random_id}.wav', audio[0], audio[1])
|
66 |
-
full_roformer_model = uvr5_models[model]
|
67 |
-
|
68 |
-
prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={output_format} --normalization=0.9 --mdxc_overlap={overlap} --mdxc_segment_size={segment_size}"
|
69 |
-
if denoise:
|
70 |
-
prompt += " --mdx_enable_denoise"
|
71 |
-
|
72 |
-
os.system(prompt)
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
return
|
77 |
|
78 |
-
# Gradio interface
|
79 |
-
|
80 |
-
sample_rate, audio_array = download_audio(url)
|
81 |
-
stems = roformer_separator((sample_rate, audio_array), model, output_format, overlap, segment_size, denoise)
|
82 |
-
return stems
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
gr.Markdown("# Hex Audio Separator")
|
87 |
-
url_input = gr.Textbox(label="YouTube URL")
|
88 |
with gr.Row():
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
98 |
|
99 |
-
|
100 |
-
submit_button.click(
|
101 |
process_audio,
|
102 |
-
inputs=[
|
103 |
-
outputs=[
|
104 |
)
|
105 |
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import os
|
3 |
+
from audio_separator import Separator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
+
def separate_audio(input_file, output_dir, model_name):
|
6 |
+
# Create output directory if it doesn't exist
|
7 |
+
os.makedirs(output_dir, exist_ok=True)
|
8 |
|
9 |
+
# Initialize the Separator
|
10 |
+
separator = Separator()
|
11 |
|
12 |
+
# Separate the audio
|
13 |
+
separator.separate_audio_file(
|
14 |
+
input_file,
|
15 |
+
output_dir,
|
16 |
+
model_name=model_name,
|
17 |
+
denoise=True,
|
18 |
+
output_format='wav',
|
19 |
+
normalization_threshold=0.9,
|
20 |
+
mdx_segment_size=256,
|
21 |
+
mdx_overlap=8,
|
22 |
+
primary_stem_only=False
|
23 |
+
)
|
24 |
|
25 |
+
# Rename the output files to match the requested format
|
26 |
+
os.rename(os.path.join(output_dir, 'Vocals.wav'), os.path.join(output_dir, '1_main_vocal.wav'))
|
27 |
+
os.rename(os.path.join(output_dir, 'Other.wav'), os.path.join(output_dir, '2_backing_vocal.wav'))
|
28 |
+
os.rename(os.path.join(output_dir, 'Instrumental.wav'), os.path.join(output_dir, '3_instrumental.wav'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
return [
|
31 |
+
os.path.join(output_dir, '1_main_vocal.wav'),
|
32 |
+
os.path.join(output_dir, '2_backing_vocal.wav'),
|
33 |
+
os.path.join(output_dir, '3_instrumental.wav')
|
34 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
def process_audio(audio_file, model_name):
|
37 |
+
output_dir = "output"
|
38 |
+
return separate_audio(audio_file.name, output_dir, model_name)
|
39 |
|
40 |
+
# Define the Gradio interface
|
41 |
+
iface = gr.Blocks()
|
|
|
|
|
|
|
42 |
|
43 |
+
with iface:
|
44 |
+
gr.Markdown("# Audio Separator")
|
|
|
|
|
45 |
with gr.Row():
|
46 |
+
with gr.Column():
|
47 |
+
audio_input = gr.Audio(type="filepath", label="Input Audio")
|
48 |
+
model_name = gr.Dropdown(
|
49 |
+
choices=["UVR-MDX-NET-Inst_HQ_3", "UVR_MDXNET_KARA_2", "UVR-MDX-NET-Inst_HQ_4"],
|
50 |
+
label="Model",
|
51 |
+
value="UVR-MDX-NET-Inst_HQ_3"
|
52 |
+
)
|
53 |
+
submit_btn = gr.Button("Separate Audio")
|
54 |
+
with gr.Column():
|
55 |
+
vocal_output = gr.Audio(label="Main Vocal")
|
56 |
+
backing_vocal_output = gr.Audio(label="Backing Vocal")
|
57 |
+
instrumental_output = gr.Audio(label="Instrumental")
|
58 |
|
59 |
+
submit_btn.click(
|
|
|
60 |
process_audio,
|
61 |
+
inputs=[audio_input, model_name],
|
62 |
+
outputs=[vocal_output, backing_vocal_output, instrumental_output]
|
63 |
)
|
64 |
|
65 |
+
iface.launch()
|