Spaces:

lyimo
/

speech_separation

Runtime error

App Files Files Community

lyimo commited on Oct 29, 2024

Commit

a097442

verified ·

1 Parent(s): 3d8b9ae

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -124

app.py CHANGED Viewed

@@ -1,130 +1,36 @@
 import gradio as gr
-import torchaudio
 import torch
-import os
-from pydub import AudioSegment
-import tempfile
-from speechbrain.pretrained.separation import SepformerSeparation
-class AudioDenoiser:
-    def __init__(self):
-        # Initialize the SepFormer model for audio enhancement
-        self.model = SepformerSeparation.from_hparams(
-            source="speechbrain/sepformer-dns4-16k-enhancement",
             savedir='pretrained_models/sepformer-dns4-16k-enhancement'
-        )
-        # Create output directory if it doesn't exist
-        os.makedirs("enhanced_audio", exist_ok=True)
-    def convert_audio_to_wav(self, input_path):
-        """
-        Convert any audio format to WAV with proper settings
-        Args:
-            input_path (str): Path to input audio file
-        Returns:
-            str: Path to converted WAV file
-        """
-        try:
-            # Create a temporary file for the converted audio
-            temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
-            temp_wav_path = temp_wav.name
-            # Load audio using pydub (supports multiple formats)
-            audio = AudioSegment.from_file(input_path)
-            # Convert to mono if stereo
-            if audio.channels > 1:
-                audio = audio.set_channels(1)
-            # Export as WAV with proper settings
-            audio.export(
-                temp_wav_path,
-                format='wav',
-                parameters=[
-                    '-ar', '16000',  # Set sample rate to 16kHz
-                    '-ac', '1'       # Set channels to mono
-                ]
-            )
-            return temp_wav_path
-        except Exception as e:
-            raise gr.Error(f"Error converting audio format: {str(e)}")
-    def enhance_audio(self, audio_path):
-        """
-        Process the input audio file and return the enhanced version
-        Args:
-            audio_path (str): Path to the input audio file
-        Returns:
-            str: Path to the enhanced audio file
-        """
-        try:
-            # Convert input audio to proper WAV format
-            wav_path = self.convert_audio_to_wav(audio_path)
-            # Separate and enhance the audio
-            est_sources = self.model.separate_file(path=wav_path)
-            # Generate output filename
-            output_path = os.path.join("enhanced_audio", "enhanced_audio.wav")
-            # Save the enhanced audio
-            torchaudio.save(
-                output_path,
-                est_sources[:, :, 0].detach().cpu(),
-                16000  # Sample rate
-            )
-            # Clean up temporary file
-            os.unlink(wav_path)
-            return output_path
-        except Exception as e:
-            raise gr.Error(f"Error processing audio: {str(e)}")
-def create_gradio_interface():
-    # Initialize the denoiser
-    denoiser = AudioDenoiser()
-    # Create the Gradio interface
-    interface = gr.Interface(
-        fn=denoiser.enhance_audio,
-        inputs=gr.Audio(
-            type="filepath",
-            label="Upload Noisy Audio"
-        ),
-        outputs=gr.Audio(
-            label="Enhanced Audio",
-            type="filepath"
-        ),
-        title="Audio Denoising using SepFormer",
-        description="""
-        This application uses the SepFormer model from SpeechBrain to enhance audio quality
-        by removing background noise. Supports various audio formats including MP3 and WAV.
-        """,
-        article="""
-        Supported audio formats:
-        - MP3
-        - WAV
-        - OGG
-        - FLAC
-        - M4A
-        and more...
-        The audio will automatically be converted to the correct format for processing.
-        """
-    )
-    return interface
-if __name__ == "__main__":
-    # Create and launch the interface
-    demo = create_gradio_interface()
-    demo.launch()

 import gradio as gr
 import torch
+import torchaudio
+from speechbrain.inference.enhancement import SpectralMaskEnhancement
+# Load the enhancement model
+model = separator.from_hparams(
+    source="speechbrain/sepformer-dns4-16k-enhancement",
             savedir='pretrained_models/sepformer-dns4-16k-enhancement'
+)
+# Define the enhancement function
+def enhance_audio(noisy_audio):
+    # Load and add a batch dimension to the audio tensor
+    noisy = enhance_model.load_audio(noisy_audio).unsqueeze(0)
+    # Enhance the audio
+    enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.0]))
+    # Save enhanced audio to a temporary file
+    enhanced_path = "enhanced.wav"
+    torchaudio.save(enhanced_path, enhanced.cpu(), 16000)
+    return enhanced_path
+# Create the Gradio interface
+interface = gr.Interface(
+    fn=enhance_audio,
+    inputs=gr.Audio(type="filepath", label="Upload Noisy Audio"),
+    outputs=gr.Audio(type="filepath", label="Enhanced Audio"),
+    title="Speech Enhancement App",
+    description="Upload a noisy audio file to enhance the quality. The enhanced audio can be downloaded after processing."
+)
+# Launch the Gradio app
+interface.launch()