lyimo commited on
Commit
5d191e9
·
verified ·
1 Parent(s): 8bb1d29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -6
app.py CHANGED
@@ -3,6 +3,8 @@ from speechbrain.pretrained import SepformerSeparation as separator
3
  import torchaudio
4
  import torch
5
  import os
 
 
6
 
7
  class AudioDenoiser:
8
  def __init__(self):
@@ -15,6 +17,43 @@ class AudioDenoiser:
15
  # Create output directory if it doesn't exist
16
  os.makedirs("enhanced_audio", exist_ok=True)
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def enhance_audio(self, audio_path):
19
  """
20
  Process the input audio file and return the enhanced version
@@ -26,8 +65,11 @@ class AudioDenoiser:
26
  str: Path to the enhanced audio file
27
  """
28
  try:
 
 
 
29
  # Separate and enhance the audio
30
- est_sources = self.model.separate_file(path=audio_path)
31
 
32
  # Generate output filename
33
  output_path = os.path.join("enhanced_audio", "enhanced_audio.wav")
@@ -39,6 +81,9 @@ class AudioDenoiser:
39
  16000 # Sample rate
40
  )
41
 
 
 
 
42
  return output_path
43
 
44
  except Exception as e:
@@ -53,19 +98,28 @@ def create_gradio_interface():
53
  fn=denoiser.enhance_audio,
54
  inputs=gr.Audio(
55
  type="filepath",
56
- label="Upload Noisy Audio"
 
57
  ),
58
  outputs=gr.Audio(
59
- label="Enhanced Audio"
 
60
  ),
61
  title="Audio Denoising using SepFormer",
62
  description="""
63
  This application uses the SepFormer model from SpeechBrain to enhance audio quality
64
- by removing background noise. Upload any noisy audio file to get started.
65
  """,
66
  article="""
67
- This application uses the SepFormer model trained on the DNS4 dataset.
68
- For more information, visit the [SpeechBrain documentation](https://speechbrain.github.io/).
 
 
 
 
 
 
 
69
  """
70
  )
71
 
 
3
  import torchaudio
4
  import torch
5
  import os
6
+ from pydub import AudioSegment
7
+ import tempfile
8
 
9
  class AudioDenoiser:
10
  def __init__(self):
 
17
  # Create output directory if it doesn't exist
18
  os.makedirs("enhanced_audio", exist_ok=True)
19
 
20
+ def convert_audio_to_wav(self, input_path):
21
+ """
22
+ Convert any audio format to WAV with proper settings
23
+
24
+ Args:
25
+ input_path (str): Path to input audio file
26
+
27
+ Returns:
28
+ str: Path to converted WAV file
29
+ """
30
+ try:
31
+ # Create a temporary file for the converted audio
32
+ temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
33
+ temp_wav_path = temp_wav.name
34
+
35
+ # Load audio using pydub (supports multiple formats)
36
+ audio = AudioSegment.from_file(input_path)
37
+
38
+ # Convert to mono if stereo
39
+ if audio.channels > 1:
40
+ audio = audio.set_channels(1)
41
+
42
+ # Export as WAV with proper settings
43
+ audio.export(
44
+ temp_wav_path,
45
+ format='wav',
46
+ parameters=[
47
+ '-ar', '16000', # Set sample rate to 16kHz
48
+ '-ac', '1' # Set channels to mono
49
+ ]
50
+ )
51
+
52
+ return temp_wav_path
53
+
54
+ except Exception as e:
55
+ raise gr.Error(f"Error converting audio format: {str(e)}")
56
+
57
  def enhance_audio(self, audio_path):
58
  """
59
  Process the input audio file and return the enhanced version
 
65
  str: Path to the enhanced audio file
66
  """
67
  try:
68
+ # Convert input audio to proper WAV format
69
+ wav_path = self.convert_audio_to_wav(audio_path)
70
+
71
  # Separate and enhance the audio
72
+ est_sources = self.model.separate_file(path=wav_path)
73
 
74
  # Generate output filename
75
  output_path = os.path.join("enhanced_audio", "enhanced_audio.wav")
 
81
  16000 # Sample rate
82
  )
83
 
84
+ # Clean up temporary file
85
+ os.unlink(wav_path)
86
+
87
  return output_path
88
 
89
  except Exception as e:
 
98
  fn=denoiser.enhance_audio,
99
  inputs=gr.Audio(
100
  type="filepath",
101
+ label="Upload Noisy Audio",
102
+ source="upload"
103
  ),
104
  outputs=gr.Audio(
105
+ label="Enhanced Audio",
106
+ type="filepath"
107
  ),
108
  title="Audio Denoising using SepFormer",
109
  description="""
110
  This application uses the SepFormer model from SpeechBrain to enhance audio quality
111
+ by removing background noise. Supports various audio formats including MP3 and WAV.
112
  """,
113
  article="""
114
+ Supported audio formats:
115
+ - MP3
116
+ - WAV
117
+ - OGG
118
+ - FLAC
119
+ - M4A
120
+ and more...
121
+
122
+ The audio will automatically be converted to the correct format for processing.
123
  """
124
  )
125