File size: 1,391 Bytes
8bb1d29
 
a097442
1594fe6
518eabe
8bb1d29
a097442
 
 
251f2af
a097442
 
 
 
518eabe
 
 
 
a097442
518eabe
a097442
 
1e246dd
a097442
518eabe
a097442
 
0720448
518eabe
 
 
a097442
8bb1d29
a097442
 
 
 
 
 
 
 
8bb1d29
1e246dd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
import torch
import torchaudio
from speechbrain.inference.separation import SepformerSeparation as separator
import os

# Load the enhancement model
model = separator.from_hparams(
    source="speechbrain/sepformer-dns4-16k-enhancement",
    savedir='pretrained_models/sepformer-dns4-16k-enhancement'
)

# Define the enhancement function
def enhance_audio(noisy_audio):
    # Convert MP3 to WAV
    wav_audio = "temp_audio.wav"
    torchaudio.save(wav_audio, *torchaudio.load(noisy_audio))

    # Load and add a batch dimension to the audio tensor
    noisy = model.load_audio(wav_audio).unsqueeze(0)

    # Enhance the audio
    enhanced = model.enhance_batch(noisy, lengths=torch.tensor([1.0]))

    # Save enhanced audio to a file
    enhanced_path = "enhanced.wav"
    torchaudio.save(enhanced_path, enhanced.cpu(), 16000)
    
    # Clean up the temporary audio file
    os.remove(wav_audio)

    return enhanced_path

# Create the Gradio interface
interface = gr.Interface(
    fn=enhance_audio,
    inputs=gr.Audio(type="filepath", label="Upload Noisy Audio"),
    outputs=gr.Audio(type="filepath", label="Enhanced Audio"),
    title="Speech Enhancement App",
    description="Upload a noisy audio file to enhance the quality. The enhanced audio can be downloaded after processing."
)

# Launch the Gradio app with public link enabled
interface.launch(share=True)