import os import numpy as np import tempfile from scipy.io import wavfile import gradio as gr from inference import EnsembleDemucsMDXMusicSeparationModel, predict_with_model import torch import time # 检查文件是否准备好 def check_file_readiness(filepath): num_same_size_checks = 0 last_size = -1 while num_same_size_checks < 5: current_size = os.path.getsize(filepath) if current_size == last_size: num_same_size_checks += 1 else: num_same_size_checks = 0 last_size = current_size time.sleep(0.5) return True # 音乐分离的包装函数 def separate_music_file_wrapper(uploaded_files, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu): input_files = [uploaded_files.name] # 获取上传文件的路径 # 验证重叠值 if not (0 <= large_overlap <= 1) or not (0 <= small_overlap <= 1): raise ValueError("重叠值必须在0到1之间。") # 验证块大小 if chunk_size <= 0: raise ValueError("块大小必须大于0。") # 太小了 options = { 'input_audio': input_files, 'output_folder': 'results', 'cpu': use_cpu, 'single_onnx': use_single_onnx, 'overlap_large': large_overlap, 'overlap_small': small_overlap, 'chunk_size': chunk_size, 'large_gpu': use_large_gpu, } predict_with_model(options) # 清除GPU缓存 if torch.cuda.is_available(): torch.cuda.empty_cache() output_files = {} for f in input_files: audio_file_name = os.path.splitext(os.path.basename(f))[0] output_files["vocals"] = os.path.join(options['output_folder'], audio_file_name + "_vocals.wav") output_files["instrumental"] = os.path.join(options['output_folder'], audio_file_name + "_instrum.wav") output_files["instrumental2"] = os.path.join(options['output_folder'], audio_file_name + "_instrum2.wav") # 第二个伴奏输出 output_files["bass"] = os.path.join(options['output_folder'], audio_file_name + "_bass.wav") output_files["drums"] = os.path.join(options['output_folder'], audio_file_name + "_drums.wav") output_files["other"] = os.path.join(options['output_folder'], audio_file_name + "_other.wav") # 检查文件是否准备好 output_files_ready = [] for k, v in output_files.items(): if os.path.exists(v) and check_file_readiness(v): output_files_ready.append(v) else: empty_data = np.zeros((44100, 2)) # 2个声道,1秒的静音,采样率为44100Hz empty_file = tempfile.mktemp('.wav') wavfile.write(empty_file, 44100, empty_data.astype(np.int16)) # 转换为int16,因为wavfile不支持float32 output_files_ready.append(empty_file) print(len(output_files_ready)) # 应输出6 print("返回前") return tuple(output_files_ready) print("返回后") description = """ # ZFTurbo Web-UI Web-UI by [Ma5onic](https://github.com/Ma5onic) ## 选项: - **仅使用CPU:** 如果GPU内存不足,请选择此项。速度会较慢。 - **使用单一ONNX模型:** 选择此项以使用单一ONNX模型。会稍微降低质量,但可以帮助减少GPU内存使用。 - **大块重叠:** 大块的重叠。根据需要调整。 - **小块重叠:** 小块的重叠。根据需要调整。 - **块大小:** 每次处理的块大小。如果遇到内存问题,请减少此值。 - **使用快速大GPU版本:** 选择此项以使用旧的快速方法,需要超过11GB的GPU内存。运行速度更快。 """ theme = gr.themes.Base( primary_hue="cyan", secondary_hue="cyan", ) with gr.Blocks(theme=theme) as demo: gr.Markdown(description) uploaded_file = gr.File(label="上传音频文件", type="file") use_cpu = gr.Checkbox(label="仅使用CPU", value=True) use_single_onnx = gr.Checkbox(label="使用单一ONNX模型", value=False) large_overlap = gr.Number(label="大块重叠", value=0.6) small_overlap = gr.Number(label="小块重叠", value=0.5) chunk_size = gr.Number(label="块大小", value=1000000) use_large_gpu = gr.Checkbox(label="使用快速大GPU版本", value=False) process_button = gr.Button("处理音频") vocals = gr.Audio(label="人声") instrumental = gr.Audio(label="伴奏") instrumental2 = gr.Audio(label="伴奏2") bass = gr.Audio(label="贝斯") drums = gr.Audio(label="鼓声") other = gr.Audio(label="其他") process_button.click( separate_music_file_wrapper, inputs=[uploaded_file, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu], outputs=[vocals, instrumental, instrumental2, bass, drums, other], ) demo.queue().launch(debug=True, share=False)