Spaces:

lyimo
/

speech_separation

Runtime error

File size: 1,391 Bytes

8bb1d29
 
a097442
1594fe6
518eabe
8bb1d29
a097442
 
 
251f2af
a097442
 
 
 
518eabe
 
 
 
a097442
518eabe
a097442
 
1e246dd
a097442
518eabe
a097442
 
0720448
518eabe
 
 
a097442
8bb1d29
a097442
 
 
 
 
 
 
 
8bb1d29
1e246dd

import gradio as gr
import torch
import torchaudio
from speechbrain.inference.separation import SepformerSeparation as separator
import os

# Load the enhancement model
model = separator.from_hparams(
    source="speechbrain/sepformer-dns4-16k-enhancement",
    savedir='pretrained_models/sepformer-dns4-16k-enhancement'
)

# Define the enhancement function
def enhance_audio(noisy_audio):
    # Convert MP3 to WAV
    wav_audio = "temp_audio.wav"
    torchaudio.save(wav_audio, *torchaudio.load(noisy_audio))

    # Load and add a batch dimension to the audio tensor
    noisy = model.load_audio(wav_audio).unsqueeze(0)

    # Enhance the audio
    enhanced = model.enhance_batch(noisy, lengths=torch.tensor([1.0]))

    # Save enhanced audio to a file
    enhanced_path = "enhanced.wav"
    torchaudio.save(enhanced_path, enhanced.cpu(), 16000)
    
    # Clean up the temporary audio file
    os.remove(wav_audio)

    return enhanced_path

# Create the Gradio interface
interface = gr.Interface(
    fn=enhance_audio,
    inputs=gr.Audio(type="filepath", label="Upload Noisy Audio"),
    outputs=gr.Audio(type="filepath", label="Enhanced Audio"),
    title="Speech Enhancement App",
    description="Upload a noisy audio file to enhance the quality. The enhanced audio can be downloaded after processing."
)

# Launch the Gradio app with public link enabled
interface.launch(share=True)