lyimo's picture
Create app.py
8bb1d29 verified
raw
history blame
2.46 kB
import gradio as gr
from speechbrain.pretrained import SepformerSeparation as separator
import torchaudio
import torch
import os
class AudioDenoiser:
def __init__(self):
# Initialize the SepFormer model for audio enhancement
self.model = separator.from_hparams(
source="speechbrain/sepformer-dns4-16k-enhancement",
savedir='pretrained_models/sepformer-dns4-16k-enhancement'
)
# Create output directory if it doesn't exist
os.makedirs("enhanced_audio", exist_ok=True)
def enhance_audio(self, audio_path):
"""
Process the input audio file and return the enhanced version
Args:
audio_path (str): Path to the input audio file
Returns:
str: Path to the enhanced audio file
"""
try:
# Separate and enhance the audio
est_sources = self.model.separate_file(path=audio_path)
# Generate output filename
output_path = os.path.join("enhanced_audio", "enhanced_audio.wav")
# Save the enhanced audio
torchaudio.save(
output_path,
est_sources[:, :, 0].detach().cpu(),
16000 # Sample rate
)
return output_path
except Exception as e:
raise gr.Error(f"Error processing audio: {str(e)}")
def create_gradio_interface():
# Initialize the denoiser
denoiser = AudioDenoiser()
# Create the Gradio interface
interface = gr.Interface(
fn=denoiser.enhance_audio,
inputs=gr.Audio(
type="filepath",
label="Upload Noisy Audio"
),
outputs=gr.Audio(
label="Enhanced Audio"
),
title="Audio Denoising using SepFormer",
description="""
This application uses the SepFormer model from SpeechBrain to enhance audio quality
by removing background noise. Upload any noisy audio file to get started.
""",
article="""
This application uses the SepFormer model trained on the DNS4 dataset.
For more information, visit the [SpeechBrain documentation](https://speechbrain.github.io/).
"""
)
return interface
if __name__ == "__main__":
# Create and launch the interface
demo = create_gradio_interface()
demo.launch()