Spaces:
Sleeping
Sleeping
File size: 2,094 Bytes
fc81f0f f939eb0 eb75e68 a29e651 eb75e68 76842df a29e651 eb75e68 76842df eb75e68 6c4aae6 f939eb0 fc81f0f 76842df 3a62ed4 f939eb0 eb75e68 f939eb0 6c4aae6 f939eb0 316bc64 f939eb0 bcee150 f939eb0 eb75e68 f939eb0 eb75e68 f939eb0 eb75e68 f939eb0 eb75e68 f939eb0 244d52c f939eb0 6c4aae6 f939eb0 eb75e68 6c4aae6 46ea61b f939eb0 46ea61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
"""Gradio demo for denoisers."""
import tempfile
from pathlib import Path
import gradio as gr
import numpy as np
import torch
import torchaudio
from denoisers import UNet1DModel, WaveUNetModel
from tqdm import tqdm
MODELS = [
"wrice/unet1d-vctk-48khz",
"wrice/waveunet-vctk-48khz",
"wrice/waveunet-vctk-24khz",
]
def denoise(model_name: str, audio_path: str):
"""Denoise audio."""
if "unet1d" in model_name:
model = UNet1DModel.from_pretrained(model_name)
else:
model = WaveUNetModel.from_pretrained(model_name)
if torch.cuda.is_available():
model = model.cuda()
stream_reader = torchaudio.io.StreamReader(audio_path)
stream_reader.add_basic_audio_stream(
frames_per_chunk=model.config.max_length,
sample_rate=model.config.sample_rate,
num_channels=1,
)
stream_writer = torchaudio.io.StreamWriter("denoised.wav")
stream_writer.add_audio_stream(sample_rate=model.config.sample_rate, num_channels=1)
chunk_size = model.config.max_length
with stream_writer.open():
for (audio_chunk,) in tqdm(stream_reader.stream()):
if audio_chunk is None:
break
audio_chunk = audio_chunk.permute(1, 0)
original_chunk_size = audio_chunk.size(-1)
if audio_chunk.size(-1) < chunk_size:
padding = chunk_size - audio_chunk.size(-1)
audio_chunk = torch.nn.functional.pad(audio_chunk, (0, padding))
if torch.cuda.is_available():
audio_chunk = audio_chunk.cuda()
with torch.no_grad():
denoised_chunk = model(audio_chunk[None]).audio
denoised_chunk = denoised_chunk[:, :, :original_chunk_size]
stream_writer.write_audio_chunk(
0, denoised_chunk.squeeze(0).permute(1, 0).cpu()
)
return "denoised.wav"
iface = gr.Interface(
fn=denoise,
inputs=[gr.Dropdown(choices=MODELS, value=MODELS[0]), gr.Audio(type="filepath")],
outputs=gr.Audio(type="filepath"),
)
iface.launch()
|