Spaces:
Runtime error
Runtime error
from pathlib import Path | |
from typing import Tuple | |
import yaml | |
import numpy as np | |
import audiotools as at | |
import argbind | |
import gradio as gr | |
from vampnet.interface import Interface | |
conf = yaml.safe_load(Path("conf/interface-jazzpop-exp.yml").read_text()) | |
Interface = argbind.bind(Interface) | |
AudioLoader = argbind.bind(at.data.datasets.AudioLoader) | |
with argbind.scope(conf): | |
interface = Interface() | |
loader = AudioLoader() | |
dataset = at.data.datasets.AudioDataset( | |
loader, | |
sample_rate=interface.codec.sample_rate, | |
duration=interface.coarse.chunk_size_s, | |
n_examples=5000, | |
without_replacement=True, | |
) | |
def load_audio(file): | |
print(file) | |
filepath = file.name | |
sig = at.AudioSignal.salient_excerpt( | |
filepath, | |
duration=interface.coarse.chunk_size_s | |
) | |
sig = interface.preprocess(sig) | |
audio = sig.samples.numpy()[0] | |
sr = sig.sample_rate | |
return sr, audio.T | |
def load_random_audio(): | |
index = np.random.randint(0, len(dataset)) | |
sig = dataset[index]["signal"] | |
sig = interface.preprocess(sig) | |
audio = sig.samples.numpy()[0] | |
sr = sig.sample_rate | |
return sr, audio.T | |
def mask_audio( | |
prefix_s, suffix_s, rand_mask_intensity, | |
mask_periodic_amt, beat_unmask_dur, | |
mask_dwn_chk, dwn_factor, | |
mask_up_chk, up_factor | |
): | |
pass | |
def vamp( | |
input_audio, prefix_s, suffix_s, rand_mask_intensity, | |
mask_periodic_amt, beat_unmask_dur, | |
mask_dwn_chk, dwn_factor, | |
mask_up_chk, up_factor | |
): | |
print(input_audio) | |
with gr.Blocks() as demo: | |
gr.Markdown('# Vampnet') | |
with gr.Row(): | |
# input audio | |
with gr.Column(): | |
gr.Markdown("## Input Audio") | |
manual_audio_upload = gr.File( | |
label=f"upload some audio (will be randomly trimmed to max of {interface.coarse.chunk_size_s:.2f}s)", | |
file_types=["audio"] | |
) | |
load_random_audio_button = gr.Button("or load random audio") | |
input_audio = gr.Audio( | |
label="input audio", | |
interactive=False, | |
) | |
input_audio_viz = gr.HTML( | |
label="input audio", | |
) | |
# connect widgets | |
load_random_audio_button.click( | |
fn=load_random_audio, | |
inputs=[], | |
outputs=[ input_audio] | |
) | |
manual_audio_upload.change( | |
fn=load_audio, | |
inputs=[manual_audio_upload], | |
outputs=[ input_audio] | |
) | |
# mask settings | |
with gr.Column(): | |
gr.Markdown("## Mask Settings") | |
prefix_s = gr.Slider( | |
label="prefix length (seconds)", | |
minimum=0.0, | |
maximum=10.0, | |
value=0.0 | |
) | |
suffix_s = gr.Slider( | |
label="suffix length (seconds)", | |
minimum=0.0, | |
maximum=10.0, | |
value=0.0 | |
) | |
rand_mask_intensity = gr.Slider( | |
label="random mask intensity (lower means more freedom)", | |
minimum=0.0, | |
maximum=1.0, | |
value=1.0 | |
) | |
mask_periodic_amt = gr.Slider( | |
label="periodic unmasking factor (higher means more freedom)", | |
minimum=0, | |
maximum=32, | |
step=1, | |
value=2, | |
) | |
compute_mask_button = gr.Button("compute mask") | |
mask_output = gr.Audio( | |
label="masked audio", | |
interactive=False, | |
visible=False | |
) | |
mask_output_viz = gr.Video( | |
label="masked audio", | |
interactive=False | |
) | |
with gr.Column(): | |
gr.Markdown("## Beat Unmasking") | |
with gr.Accordion(label="beat unmask"): | |
beat_unmask_dur = gr.Slider( | |
label="duration", | |
minimum=0.0, | |
maximum=3.0, | |
value=0.1 | |
) | |
with gr.Accordion("downbeat settings"): | |
mask_dwn_chk = gr.Checkbox( | |
label="unmask downbeats", | |
value=True | |
) | |
dwn_factor = gr.Slider( | |
label="downbeat downsample factor (unmask every Nth downbeat)", | |
value=1, | |
minimum=1, | |
maximum=16, | |
step=1 | |
) | |
with gr.Accordion("upbeat settings"): | |
mask_up_chk = gr.Checkbox( | |
label="unmask upbeats", | |
value=True | |
) | |
up_factor = gr.Slider( | |
label="upbeat downsample factor (unmask every Nth upbeat)", | |
value=1, | |
minimum=1, | |
maximum=16, | |
step=1 | |
) | |
# process and output | |
with gr.Row(): | |
with gr.Column(): | |
vamp_button = gr.Button("vamp") | |
output_audio = gr.Audio( | |
label="output audio", | |
interactive=False, | |
visible=False | |
) | |
output_audio_viz = gr.Video( | |
label="output audio", | |
interactive=False | |
) | |
# connect widgets | |
compute_mask_button.click( | |
fn=mask_audio, | |
inputs=[ | |
prefix_s, suffix_s, rand_mask_intensity, | |
mask_periodic_amt, beat_unmask_dur, | |
mask_dwn_chk, dwn_factor, | |
mask_up_chk, up_factor | |
], | |
outputs=[mask_output, mask_output_viz] | |
) | |
# connect widgets | |
vamp_button.click( | |
fn=vamp, | |
inputs=[input_audio, | |
prefix_s, suffix_s, rand_mask_intensity, | |
mask_periodic_amt, beat_unmask_dur, | |
mask_dwn_chk, dwn_factor, | |
mask_up_chk, up_factor | |
], | |
outputs=[output_audio, output_audio_viz] | |
) | |
demo.launch(share=True) |