File size: 1,534 Bytes
fc81f0f
eb75e68
a29e651
eb75e68
 
 
a29e651
eb75e68
fc81f0f
eb75e68
6c4aae6
fc81f0f
 
 
 
 
 
 
 
 
 
 
 
 
eb75e68
 
 
 
9ac6d71
 
6c4aae6
fc81f0f
 
eb75e68
fc81f0f
eb75e68
 
 
 
 
a29e651
6c4aae6
eb75e68
fc81f0f
eb75e68
 
9c1dd49
d2d93dc
244d52c
 
6c4aae6
fc81f0f
eb75e68
6c4aae6
fc81f0f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""Gradio demo for denoisers."""
import gradio as gr
import numpy as np
import torch
import torchaudio
from denoisers import WaveUNetModel
from tqdm import tqdm

MODELS = ["wrice/waveunet-vctk-48khz", "wrice/waveunet-vctk-24khz"]


def main():
    """Main."""
    iface = gr.Interface(
        fn=denoise,
        inputs=[gr.Dropdown(choices=MODELS, default=MODELS[0]), "audio"],
        outputs="audio",
    )
    iface.launch()


def denoise(model_name, inputs):
    """Denoise audio."""
    model = WaveUNetModel.from_pretrained(model_name)
    sr, audio = inputs
    audio = torch.from_numpy(audio)[None]
    audio = audio / 32768.0

    print(f"Audio shape: {audio.shape}")
    print(f"Sample rate: {sr}")

    if sr != model.config.sample_rate:
        audio = torchaudio.functional.resample(audio, sr, model.config.sample_rate)

    chunk_size = model.config.max_length

    padding = abs(audio.size(-1) % chunk_size - chunk_size)
    padded = torch.nn.functional.pad(audio, (0, padding))

    clean = []
    for i in tqdm(range(0, padded.shape[-1], chunk_size)):
        audio_chunk = padded[:, i : i + chunk_size]
        with torch.no_grad():
            clean_chunk = model(audio_chunk[None]).logits
        clean.append(clean_chunk.squeeze(0))

    denoised = torch.concat(clean).flatten()[: audio.shape[-1]].clamp(-1.0, 1.0)
    denoised = (denoised * 32767.0).numpy().astype(np.int16)

    print(f"Denoised shape: {denoised.shape}")

    return model.config.sample_rate, denoised


if __name__ == "__main__":
    main()