Spaces:
Sleeping
Sleeping
File size: 1,768 Bytes
fc81f0f eb75e68 a29e651 eb75e68 76842df a29e651 eb75e68 76842df eb75e68 6c4aae6 fc81f0f 76842df eb75e68 f74d062 eb75e68 3a62ed4 f74d062 eb75e68 9ac6d71 6c4aae6 c13028f 316bc64 bcee150 fc81f0f eb75e68 fc81f0f eb75e68 a29e651 4f912e8 eb75e68 7ca9435 eb75e68 61559e0 d2d93dc 244d52c 6c4aae6 3d1e219 eb75e68 6c4aae6 46ea61b 6b54c4d 46ea61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
"""Gradio demo for denoisers."""
import gradio as gr
import numpy as np
import torch
import torchaudio
from denoisers import UNet1DModel, WaveUNetModel
from tqdm import tqdm
MODELS = [
"wrice/unet1d-vctk-48khz",
"wrice/waveunet-vctk-48khz",
"wrice/waveunet-vctk-24khz",
]
def denoise(model_name, inputs):
"""Denoise audio."""
if "unet1d" in model_name:
model = UNet1DModel.from_pretrained(model_name)
else:
model = WaveUNetModel.from_pretrained(model_name)
sr, audio = inputs
audio = torch.from_numpy(audio)
audio = audio / 32768.0
if audio.ndim == 1:
audio = audio.unsqueeze(0)
print(f"Audio shape: {audio.shape}")
print(f"Sample rate: {sr}")
if audio.shape[0] > 1:
audio = audio.mean(0, keepdim=True)
print(f"Audio shape: {audio.shape}")
if sr != model.config.sample_rate:
audio = torchaudio.functional.resample(audio, sr, model.config.sample_rate)
chunk_size = model.config.max_length
padding = abs(audio.size(-1) % chunk_size - chunk_size)
padded = torch.nn.functional.pad(audio, (0, padding))
clean = []
for i in tqdm(range(0, padded.shape[-1], chunk_size)):
audio_chunk = padded[:, :, i : i + chunk_size]
with torch.no_grad():
clean_chunk = model(audio_chunk).audio
clean.append(clean_chunk.squeeze(0))
denoised = torch.concat(clean, 1)[:, : audio.shape[-1]].clamp(-1.0, 1.0)
denoised = (denoised * 32767.0).numpy().astype(np.int16)
print(f"Denoised shape: {denoised.shape}")
return model.config.sample_rate, denoised.transpose()
iface = gr.Interface(
fn=denoise,
inputs=[gr.Dropdown(choices=MODELS, value=MODELS[0]), "audio"],
outputs="audio",
)
iface.launch()
|