Spaces:
Sleeping
Sleeping
File size: 1,534 Bytes
fc81f0f eb75e68 a29e651 eb75e68 a29e651 eb75e68 fc81f0f eb75e68 6c4aae6 fc81f0f eb75e68 9ac6d71 6c4aae6 fc81f0f eb75e68 fc81f0f eb75e68 a29e651 6c4aae6 eb75e68 fc81f0f eb75e68 9c1dd49 d2d93dc 244d52c 6c4aae6 fc81f0f eb75e68 6c4aae6 fc81f0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
"""Gradio demo for denoisers."""
import gradio as gr
import numpy as np
import torch
import torchaudio
from denoisers import WaveUNetModel
from tqdm import tqdm
MODELS = ["wrice/waveunet-vctk-48khz", "wrice/waveunet-vctk-24khz"]
def main():
"""Main."""
iface = gr.Interface(
fn=denoise,
inputs=[gr.Dropdown(choices=MODELS, default=MODELS[0]), "audio"],
outputs="audio",
)
iface.launch()
def denoise(model_name, inputs):
"""Denoise audio."""
model = WaveUNetModel.from_pretrained(model_name)
sr, audio = inputs
audio = torch.from_numpy(audio)[None]
audio = audio / 32768.0
print(f"Audio shape: {audio.shape}")
print(f"Sample rate: {sr}")
if sr != model.config.sample_rate:
audio = torchaudio.functional.resample(audio, sr, model.config.sample_rate)
chunk_size = model.config.max_length
padding = abs(audio.size(-1) % chunk_size - chunk_size)
padded = torch.nn.functional.pad(audio, (0, padding))
clean = []
for i in tqdm(range(0, padded.shape[-1], chunk_size)):
audio_chunk = padded[:, i : i + chunk_size]
with torch.no_grad():
clean_chunk = model(audio_chunk[None]).logits
clean.append(clean_chunk.squeeze(0))
denoised = torch.concat(clean).flatten()[: audio.shape[-1]].clamp(-1.0, 1.0)
denoised = (denoised * 32767.0).numpy().astype(np.int16)
print(f"Denoised shape: {denoised.shape}")
return model.config.sample_rate, denoised
if __name__ == "__main__":
main()
|