File size: 1,285 Bytes
6c4aae6
 
eb75e68
 
 
 
 
6c4aae6
eb75e68
 
6c4aae6
eb75e68
 
 
 
 
6c4aae6
 
 
eb75e68
 
 
 
 
 
 
 
 
 
6c4aae6
eb75e68
 
 
 
6c4aae6
ab6d95c
6c4aae6
 
 
4887a61
eb75e68
6c4aae6
eb75e68
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from logging import getLogger

import gradio as gr
import torch
import torchaudio
from denoisers import WaveUNetModel

LOGGER = getLogger(__name__)
MODEL = WaveUNetModel.from_pretrained("wrice/waveunet-vctk-24khz")


def denoise(inputs):
    sr, audio = inputs
    audio = torch.from_numpy(audio)[None]
    audio = audio / 32768.0

    LOGGER.info(f"Audio shape: {audio.shape}")
    LOGGER.info(f"Sample rate: {sr}")

    if sr != MODEL.config.sample_rate:
        audio = torchaudio.functional.resample(audio, sr, MODEL.config.sample_rate)

    chunk_size = MODEL.config.max_length

    padding = abs(audio.size(-1) % chunk_size - chunk_size)
    padded = torch.nn.functional.pad(audio, (0, padding))

    clean = []
    for i in range(0, padded.shape[-1], chunk_size):
        audio_chunk = padded[:, i : i + chunk_size]
        with torch.no_grad():
            clean_chunk = MODEL(audio_chunk[None]).logits
        clean.append(clean_chunk.squeeze(0))

    denoised = torch.concat(clean)[:, : audio.shape[-1]].squeeze().clamp(-1.0, 1.0)
    denoised = (denoised * 32767.0).numpy().astype("int16")

    LOGGER.info(f"Denoised shape: {denoised.shape}")

    return MODEL.config.sample_rate, denoised


iface = gr.Interface(fn=denoise, inputs="audio", outputs="audio")
iface.launch()