lyimo's picture
Update app.py
1594fe6 verified
raw
history blame
1.3 kB
import gradio as gr
import torch
import torchaudio
from speechbrain.inference.enhancement import SpectralMaskEnhancement
from speechbrain.inference.separation import SepformerSeparation as separator
from IPython.display import Audio
# Load the enhancement model
model = separator.from_hparams(
source="speechbrain/sepformer-dns4-16k-enhancement",
savedir='pretrained_models/sepformer-dns4-16k-enhancement'
)
# Define the enhancement function
def enhance_audio(noisy_audio):
# Load and add a batch dimension to the audio tensor
noisy = enhance_model.load_audio(noisy_audio).unsqueeze(0)
# Enhance the audio
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.0]))
# Save enhanced audio to a temporary file
enhanced_path = "enhanced.wav"
torchaudio.save(enhanced_path, enhanced.cpu(), 16000)
return enhanced_path
# Create the Gradio interface
interface = gr.Interface(
fn=enhance_audio,
inputs=gr.Audio(type="filepath", label="Upload Noisy Audio"),
outputs=gr.Audio(type="filepath", label="Enhanced Audio"),
title="Speech Enhancement App",
description="Upload a noisy audio file to enhance the quality. The enhanced audio can be downloaded after processing."
)
# Launch the Gradio app
interface.launch()