from pyharp import ModelCard, build_endpoint, save_and_return_filepath from audiotools import AudioSignal from audioldm import build_model, style_transfer import gradio as gr import soundfile as sf audioldm = build_model(model_name="audioldm-m-full") def save_wave(waveform, savepath, name="outwav"): if type(name) is not list: name = [name] * waveform.shape[0] for i in range(waveform.shape[0]): path = os.path.join( savepath, "%s_%s.wav" % ( os.path.basename(name[i]) if (not ".wav" in name[i]) else os.path.basename(name[i]).split(".")[0], i, ), ) print("Save audio to %s" % path) sf.write(path, waveform[i, 0], samplerate=16000) def process_fn(input_audio_path, prompt, seed, guidance_scale, num_inference_steps, num_candidates, audio_length_in_s, transfer_strength): waveform = style_transfer( audioldm, prompt, audio_file, transfer_strength, int(seed), duration = audio_length_in_s, guidance_scale = guidance_scale, ddim_steps = int(num_inference_steps), batchsize = int(num_candidates), config=None, ) waveform = waveform[:,None,:] sf.write("./output.wav", waveform[0, 0], samplerate=16000) #save_wave(waveform, "./", name="output.wav") #broken, always appends _0.wav return "./output.wav" card = ModelCard( name='AudioLDM Variations', description='AudioLDM Variation Generator, operates on region selected in track.', author='Team Audio', tags=['AudioLDM', 'Variations', 'audio-to-audio'] ) with gr.Blocks() as webapp: # Define your Gradio interface inputs = [ gr.Audio( label="Audio Input", type="filepath" ), gr.Slider( label="seed", minimum="0", maximum="65535", value="43534", step="1" ), gr.Slider( minimum=0, maximum=10, step=0.1, value=2.5, label="Guidance Scale" ), gr.Slider( minimum=1, maximum=500, step=1, value=200, label="Inference Steps" ), gr.Slider( minimum=1, maximum=10, step=1, value=1, label="Candidates" ), gr.Slider( minimum=2.5, maximum=10.0, step=2.5, value=5, label="Duration" ), ] output = gr.Audio(label="Audio Output", type="filepath") ctrls_data, ctrls_button, process_button, cancel_button = build_endpoint(inputs, output, process_fn, card) # queue the webapp: https://www.gradio.app/guides/setting-up-a-demo-for-maximum-performance #webapp.queue() webapp.launch(share=True) for audio_file in input_files: waveform = style_transfer( audioldm, PROMPT, audio_file, TRANSFER_STRENGTH, SEED, duration = DURATION, guidance_scale = GUIDANCE_SCALE, ddim_steps = STEPS, batchsize = N_VARIATIONS, config=None, ) waveform = waveform[:,None,:] save_wave(waveform, OUTPUT_DIRECTORY, name=f"{os.path.basename(audio_file)}_STYLE_{PROMPT}")