Spaces:
Running
Running
File size: 1,267 Bytes
eb75e68 a29e651 eb75e68 a29e651 eb75e68 6c4aae6 eb75e68 9ac6d71 6c4aae6 eb75e68 a29e651 6c4aae6 eb75e68 9c1dd49 d2d93dc 244d52c 6c4aae6 d2af860 eb75e68 6c4aae6 eb75e68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
import numpy as np
import torch
import torchaudio
from denoisers import WaveUNetModel
from tqdm import tqdm
MODEL = WaveUNetModel.from_pretrained("wrice/waveunet-vctk-24khz")
def denoise(inputs):
sr, audio = inputs
audio = torch.from_numpy(audio)[None]
audio = audio / 32768.0
print(f"Audio shape: {audio.shape}")
print(f"Sample rate: {sr}")
if sr != MODEL.config.sample_rate:
audio = torchaudio.functional.resample(audio, sr, MODEL.config.sample_rate)
chunk_size = MODEL.config.max_length
padding = abs(audio.size(-1) % chunk_size - chunk_size)
padded = torch.nn.functional.pad(audio, (0, padding))
clean = []
for i in tqdm(range(0, padded.shape[-1], chunk_size)):
audio_chunk = padded[:, i : i + chunk_size]
with torch.no_grad():
clean_chunk = MODEL(audio_chunk[None]).logits
clean.append(clean_chunk.squeeze(0))
denoised = torch.concat(clean).flatten()[: audio.shape[-1]].clamp(-1.0, 1.0)
denoised = (denoised * 32767.0).numpy().astype(np.int16)
print(f"Denoised shape: {denoised.shape}")
return MODEL.config.sample_rate, denoised[:, np.newaxis]
iface = gr.Interface(fn=denoise, inputs="audio", outputs="audio")
iface.launch()
|