Spaces:

lyimo
/

speech_separation

Runtime error

App Files Files Community

lyimo commited on Oct 28, 2024

Commit

5d191e9

verified ·

1 Parent(s): 8bb1d29

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -6

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ from speechbrain.pretrained import SepformerSeparation as separator
 import torchaudio
 import torch
 import os
 class AudioDenoiser:
     def __init__(self):
@@ -15,6 +17,43 @@ class AudioDenoiser:
         # Create output directory if it doesn't exist
         os.makedirs("enhanced_audio", exist_ok=True)
     def enhance_audio(self, audio_path):
         """
         Process the input audio file and return the enhanced version
@@ -26,8 +65,11 @@ class AudioDenoiser:
             str: Path to the enhanced audio file
         """
         try:
             # Separate and enhance the audio
-            est_sources = self.model.separate_file(path=audio_path)
             # Generate output filename
             output_path = os.path.join("enhanced_audio", "enhanced_audio.wav")
@@ -39,6 +81,9 @@ class AudioDenoiser:
                 16000  # Sample rate
             )
             return output_path
         except Exception as e:
@@ -53,19 +98,28 @@ def create_gradio_interface():
         fn=denoiser.enhance_audio,
         inputs=gr.Audio(
             type="filepath",
-            label="Upload Noisy Audio"
         ),
         outputs=gr.Audio(
-            label="Enhanced Audio"
         ),
         title="Audio Denoising using SepFormer",
         description="""
         This application uses the SepFormer model from SpeechBrain to enhance audio quality
-        by removing background noise. Upload any noisy audio file to get started.
         """,
         article="""
-        This application uses the SepFormer model trained on the DNS4 dataset.
-        For more information, visit the [SpeechBrain documentation](https://speechbrain.github.io/).
         """
     )

 import torchaudio
 import torch
 import os
+from pydub import AudioSegment
+import tempfile
 class AudioDenoiser:
     def __init__(self):
         # Create output directory if it doesn't exist
         os.makedirs("enhanced_audio", exist_ok=True)
+    def convert_audio_to_wav(self, input_path):
+        """
+        Convert any audio format to WAV with proper settings
+        Args:
+            input_path (str): Path to input audio file
+        Returns:
+            str: Path to converted WAV file
+        """
+        try:
+            # Create a temporary file for the converted audio
+            temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+            temp_wav_path = temp_wav.name
+            # Load audio using pydub (supports multiple formats)
+            audio = AudioSegment.from_file(input_path)
+            # Convert to mono if stereo
+            if audio.channels > 1:
+                audio = audio.set_channels(1)
+            # Export as WAV with proper settings
+            audio.export(
+                temp_wav_path,
+                format='wav',
+                parameters=[
+                    '-ar', '16000',  # Set sample rate to 16kHz
+                    '-ac', '1'       # Set channels to mono
+                ]
+            )
+            return temp_wav_path
+        except Exception as e:
+            raise gr.Error(f"Error converting audio format: {str(e)}")
     def enhance_audio(self, audio_path):
         """
         Process the input audio file and return the enhanced version
             str: Path to the enhanced audio file
         """
         try:
+            # Convert input audio to proper WAV format
+            wav_path = self.convert_audio_to_wav(audio_path)
             # Separate and enhance the audio
+            est_sources = self.model.separate_file(path=wav_path)
             # Generate output filename
             output_path = os.path.join("enhanced_audio", "enhanced_audio.wav")
                 16000  # Sample rate
             )
+            # Clean up temporary file
+            os.unlink(wav_path)
             return output_path
         except Exception as e:
         fn=denoiser.enhance_audio,
         inputs=gr.Audio(
             type="filepath",
+            label="Upload Noisy Audio",
+            source="upload"
         ),
         outputs=gr.Audio(
+            label="Enhanced Audio",
+            type="filepath"
         ),
         title="Audio Denoising using SepFormer",
         description="""
         This application uses the SepFormer model from SpeechBrain to enhance audio quality
+        by removing background noise. Supports various audio formats including MP3 and WAV.
         """,
         article="""
+        Supported audio formats:
+        - MP3
+        - WAV
+        - OGG
+        - FLAC
+        - M4A
+        and more...
+        The audio will automatically be converted to the correct format for processing.
         """
     )