File size: 2,189 Bytes
fc81f0f
f939eb0
eb75e68
 
 
76842df
a29e651
eb75e68
76842df
 
 
 
 
eb75e68
6c4aae6
8c12fe6
fc81f0f
76842df
 
 
 
3a62ed4
f939eb0
 
eb75e68
3b84cc8
 
 
 
 
 
 
6c4aae6
3b84cc8
 
 
 
316bc64
3b84cc8
bcee150
3b84cc8
 
 
 
eb75e68
3b84cc8
 
eb75e68
3b84cc8
 
 
eb75e68
3b84cc8
 
eb75e68
3b84cc8
 
 
244d52c
3b84cc8
 
 
6c4aae6
3b84cc8
eb75e68
6c4aae6
46ea61b
 
f939eb0
 
46ea61b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""Gradio demo for denoisers."""

import gradio as gr
import torch
import torchaudio
from denoisers import UNet1DModel, WaveUNetModel
from tqdm import tqdm

MODELS = [
    "wrice/unet1d-vctk-48khz",
    "wrice/waveunet-vctk-48khz",
    "wrice/waveunet-vctk-24khz",
]


def denoise(model_name: str, audio_path: str) -> str:
    """Denoise audio."""
    if "unet1d" in model_name:
        model = UNet1DModel.from_pretrained(model_name)
    else:
        model = WaveUNetModel.from_pretrained(model_name)

    if torch.cuda.is_available():
        model = model.cuda()

    if audio_path:
        stream_reader = torchaudio.io.StreamReader(audio_path)
        stream_reader.add_basic_audio_stream(
            frames_per_chunk=model.config.max_length,
            sample_rate=model.config.sample_rate,
            num_channels=1,
        )

        stream_writer = torchaudio.io.StreamWriter("denoised.wav")
        stream_writer.add_audio_stream(
            sample_rate=model.config.sample_rate, num_channels=1
        )

        chunk_size = model.config.max_length

        with stream_writer.open():
            for (audio_chunk,) in tqdm(stream_reader.stream()):
                if audio_chunk is None:
                    break

                audio_chunk = audio_chunk.permute(1, 0)
                original_chunk_size = audio_chunk.size(-1)

                if audio_chunk.size(-1) < chunk_size:
                    padding = chunk_size - audio_chunk.size(-1)
                    audio_chunk = torch.nn.functional.pad(audio_chunk, (0, padding))

                if torch.cuda.is_available():
                    audio_chunk = audio_chunk.cuda()

                with torch.no_grad():
                    denoised_chunk = model(audio_chunk[None]).audio
                    denoised_chunk = denoised_chunk[..., :original_chunk_size]

                stream_writer.write_audio_chunk(
                    0, denoised_chunk.squeeze(0).permute(1, 0).cpu()
                )

        return "denoised.wav"


iface = gr.Interface(
    fn=denoise,
    inputs=[gr.Dropdown(choices=MODELS, value=MODELS[0]), gr.Audio(type="filepath")],
    outputs=gr.Audio(type="filepath"),
)
iface.launch()