Blane187 commited on
Commit
96559f5
·
verified ·
1 Parent(s): eae55fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -92
app.py CHANGED
@@ -1,106 +1,65 @@
1
- import os
2
- import re
3
- import random
4
- from scipy.io.wavfile import write, read
5
- import numpy as np
6
  import gradio as gr
7
- import yt_dlp
8
-
9
- # Model dictionaries and lists
10
- uvr5_models = {
11
- 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
12
- 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
13
- 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
14
- 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',
15
- 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
16
- 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
17
- 'Kim_Vocal_1.onnx',
18
- 'Kim_Vocal_2.onnx',
19
- 'Kim_Inst.onnx',
20
- 'Reverb_HQ_By_FoxJoy.onnx',
21
- 'UVR-MDX-NET_Crowd_HQ_1.onnx',
22
- 'UVR-De-Echo-Aggressive.pth',
23
- 'UVR-De-Echo-Normal.pth',
24
- 'UVR-DeEcho-DeReverb.pth',
25
- 'UVR-DeNoise-Lite.pth',
26
- 'UVR-DeNoise.pth',
27
- 'UVR-BVE-4B_SN-44100-1.pth',
28
- 'htdemucs_ft.yaml',
29
- 'htdemucs.yaml',
30
- 'hdemucs_mmi.yaml',
31
- }
32
-
33
 
 
 
 
34
 
35
- # More model lists...
 
36
 
37
- output_format = ['wav', 'flac', 'mp3']
38
- mdxnet_overlap_values = ['0.25', '0.5', '0.75', '0.99']
39
- vrarch_window_size_values = ['320', '512', '1024']
40
- demucs_overlap_values = ['0.25', '0.50', '0.75', '0.99']
 
 
 
 
 
 
 
 
41
 
42
- # Function to download audio
43
- def download_audio(url):
44
- ydl_opts = {
45
- 'format': 'bestaudio/best',
46
- 'outtmpl': 'ytdl/%(title)s.%(ext)s',
47
- 'postprocessors': [{
48
- 'key': 'FFmpegExtractAudio',
49
- 'preferredcodec': 'wav',
50
- 'preferredquality': '192',
51
- }],
52
- }
53
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
54
- info_dict = ydl.extract_info(url, download=True)
55
- file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
56
- sample_rate, audio_data = read(file_path)
57
- audio_array = np.asarray(audio_data, dtype=np.int16)
58
- return sample_rate, audio_array
59
 
60
- # Function to separate audio using Roformer
61
- def roformer_separator(audio, model, output_format, overlap, segment_size, denoise):
62
- directory = "./outputs"
63
- random_id = str(random.randint(10000, 99999))
64
- os.makedirs("outputs", exist_ok=True)
65
- write(f'{random_id}.wav', audio[0], audio[1])
66
- full_roformer_model = uvr5_models[model]
67
-
68
- prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={output_format} --normalization=0.9 --mdxc_overlap={overlap} --mdxc_segment_size={segment_size}"
69
- if denoise:
70
- prompt += " --mdx_enable_denoise"
71
-
72
- os.system(prompt)
73
 
74
- files_list = [os.path.join(directory, file) for file in os.listdir(directory) if re.search(random_id, file)]
75
- stem1_file, stem2_file, stem3_file = files_list[:3] # Assuming the files are in the correct order
76
- return stem1_file, stem2_file, stem3_file
77
 
78
- # Gradio interface
79
- def process_audio(url, model, output_format, overlap, segment_size, denoise):
80
- sample_rate, audio_array = download_audio(url)
81
- stems = roformer_separator((sample_rate, audio_array), model, output_format, overlap, segment_size, denoise)
82
- return stems
83
 
84
- # Gradio UI
85
- with gr.Blocks() as demo:
86
- gr.Markdown("# Hex Audio Separator")
87
- url_input = gr.Textbox(label="YouTube URL")
88
  with gr.Row():
89
- model_input = gr.Dropdown(choices=list(uvr5_models()), label="Roformer Model")
90
- format_input = gr.Dropdown(choices=output_format, label="Output Format")
91
- overlap_input = gr.Dropdown(choices=mdxnet_overlap_values, label="Overlap")
92
- segment_input = gr.Slider(0, 100, label="Segment Size")
93
- denoise_input = gr.Checkbox(label="Enable Denoise")
94
-
95
- output1 = gr.Audio(label="Vocals")
96
- output2 = gr.Audio(label="Instrumental")
97
- output3 = gr.Audio(label="Backing Vocals")
 
 
 
98
 
99
- submit_button = gr.Button("Process")
100
- submit_button.click(
101
  process_audio,
102
- inputs=[url_input, model_input, format_input, overlap_input, segment_input, denoise_input],
103
- outputs=[output1, output2, output3]
104
  )
105
 
106
- demo.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+ from audio_separator import Separator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ def separate_audio(input_file, output_dir, model_name):
6
+ # Create output directory if it doesn't exist
7
+ os.makedirs(output_dir, exist_ok=True)
8
 
9
+ # Initialize the Separator
10
+ separator = Separator()
11
 
12
+ # Separate the audio
13
+ separator.separate_audio_file(
14
+ input_file,
15
+ output_dir,
16
+ model_name=model_name,
17
+ denoise=True,
18
+ output_format='wav',
19
+ normalization_threshold=0.9,
20
+ mdx_segment_size=256,
21
+ mdx_overlap=8,
22
+ primary_stem_only=False
23
+ )
24
 
25
+ # Rename the output files to match the requested format
26
+ os.rename(os.path.join(output_dir, 'Vocals.wav'), os.path.join(output_dir, '1_main_vocal.wav'))
27
+ os.rename(os.path.join(output_dir, 'Other.wav'), os.path.join(output_dir, '2_backing_vocal.wav'))
28
+ os.rename(os.path.join(output_dir, 'Instrumental.wav'), os.path.join(output_dir, '3_instrumental.wav'))
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ return [
31
+ os.path.join(output_dir, '1_main_vocal.wav'),
32
+ os.path.join(output_dir, '2_backing_vocal.wav'),
33
+ os.path.join(output_dir, '3_instrumental.wav')
34
+ ]
 
 
 
 
 
 
 
 
35
 
36
+ def process_audio(audio_file, model_name):
37
+ output_dir = "output"
38
+ return separate_audio(audio_file.name, output_dir, model_name)
39
 
40
+ # Define the Gradio interface
41
+ iface = gr.Blocks()
 
 
 
42
 
43
+ with iface:
44
+ gr.Markdown("# Audio Separator")
 
 
45
  with gr.Row():
46
+ with gr.Column():
47
+ audio_input = gr.Audio(type="filepath", label="Input Audio")
48
+ model_name = gr.Dropdown(
49
+ choices=["UVR-MDX-NET-Inst_HQ_3", "UVR_MDXNET_KARA_2", "UVR-MDX-NET-Inst_HQ_4"],
50
+ label="Model",
51
+ value="UVR-MDX-NET-Inst_HQ_3"
52
+ )
53
+ submit_btn = gr.Button("Separate Audio")
54
+ with gr.Column():
55
+ vocal_output = gr.Audio(label="Main Vocal")
56
+ backing_vocal_output = gr.Audio(label="Backing Vocal")
57
+ instrumental_output = gr.Audio(label="Instrumental")
58
 
59
+ submit_btn.click(
 
60
  process_audio,
61
+ inputs=[audio_input, model_name],
62
+ outputs=[vocal_output, backing_vocal_output, instrumental_output]
63
  )
64
 
65
+ iface.launch()