Kr08 commited on
Commit
3e497df
·
verified ·
1 Parent(s): 3e7e003

Update audio_processing.py

Browse files
Files changed (1) hide show
  1. audio_processing.py +19 -5
audio_processing.py CHANGED
@@ -10,13 +10,27 @@ from model_utils import get_processor, get_model, get_whisper_model_small, get_d
10
  from config import SAMPLING_RATE, CHUNK_LENGTH_S
11
 
12
 
13
- def resample_with_ffmpeg(input_file, output_file, target_sr=16000):
14
- command = [
15
- 'ffmpeg', '-i', input_file, '-ar', str(target_sr), output_file
16
- ]
17
- subprocess.run(command, check=True)
18
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @spaces.GPU
21
  def detect_language(audio):
22
  whisper_model = get_whisper_model_small()
 
10
  from config import SAMPLING_RATE, CHUNK_LENGTH_S
11
 
12
 
13
+ # def resample_with_ffmpeg(input_file, output_file, target_sr=16000):
14
+ # command = [
15
+ # 'ffmpeg', '-i', input_file, '-ar', str(target_sr), output_file
16
+ # ]
17
+ # subprocess.run(command, check=True)
18
 
19
 
20
+ @spaces.GPU
21
+ def load_and_resample_audio(file):
22
+ waveform, sample_rate = torchaudio.load(file)
23
+
24
+ if sample_rate != SAMPLING_RATE:
25
+ waveform = F.resample(waveform, sample_rate, SAMPLING_RATE)
26
+
27
+ # Ensure the audio is in the correct shape (mono)
28
+ if waveform.dim() > 1 and waveform.shape[0] > 1:
29
+ waveform = waveform.mean(dim=0, keepdim=True)
30
+
31
+ return waveform, SAMPLING_RATE
32
+
33
+
34
  @spaces.GPU
35
  def detect_language(audio):
36
  whisper_model = get_whisper_model_small()