File size: 2,464 Bytes
8bb1d29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
from speechbrain.pretrained import SepformerSeparation as separator
import torchaudio
import torch
import os

class AudioDenoiser:
    def __init__(self):
        # Initialize the SepFormer model for audio enhancement
        self.model = separator.from_hparams(
            source="speechbrain/sepformer-dns4-16k-enhancement",
            savedir='pretrained_models/sepformer-dns4-16k-enhancement'
        )
        
        # Create output directory if it doesn't exist
        os.makedirs("enhanced_audio", exist_ok=True)
    
    def enhance_audio(self, audio_path):
        """
        Process the input audio file and return the enhanced version
        
        Args:
            audio_path (str): Path to the input audio file
            
        Returns:
            str: Path to the enhanced audio file
        """
        try:
            # Separate and enhance the audio
            est_sources = self.model.separate_file(path=audio_path)
            
            # Generate output filename
            output_path = os.path.join("enhanced_audio", "enhanced_audio.wav")
            
            # Save the enhanced audio
            torchaudio.save(
                output_path,
                est_sources[:, :, 0].detach().cpu(),
                16000  # Sample rate
            )
            
            return output_path
            
        except Exception as e:
            raise gr.Error(f"Error processing audio: {str(e)}")

def create_gradio_interface():
    # Initialize the denoiser
    denoiser = AudioDenoiser()
    
    # Create the Gradio interface
    interface = gr.Interface(
        fn=denoiser.enhance_audio,
        inputs=gr.Audio(
            type="filepath",
            label="Upload Noisy Audio"
        ),
        outputs=gr.Audio(
            label="Enhanced Audio"
        ),
        title="Audio Denoising using SepFormer",
        description="""
        This application uses the SepFormer model from SpeechBrain to enhance audio quality
        by removing background noise. Upload any noisy audio file to get started.
        """,
        article="""
        This application uses the SepFormer model trained on the DNS4 dataset.
        For more information, visit the [SpeechBrain documentation](https://speechbrain.github.io/).
        """
    )
    
    return interface

if __name__ == "__main__":
    # Create and launch the interface
    demo = create_gradio_interface()
    demo.launch()