Spaces:

Kr08
/

ASR_gradio

Build error

Kr08 commited on Aug 27, 2024

Commit

3e497df

verified ·

1 Parent(s): 3e7e003

Update audio_processing.py

Files changed (1) hide show

audio_processing.py CHANGED Viewed

@@ -10,13 +10,27 @@ from model_utils import get_processor, get_model, get_whisper_model_small, get_d
 from config import SAMPLING_RATE, CHUNK_LENGTH_S
-def resample_with_ffmpeg(input_file, output_file, target_sr=16000):
-    command = [
-        'ffmpeg', '-i', input_file, '-ar', str(target_sr), output_file
-    ]
-    subprocess.run(command, check=True)
 @spaces.GPU
 def detect_language(audio):
     whisper_model = get_whisper_model_small()

 from config import SAMPLING_RATE, CHUNK_LENGTH_S
+# def resample_with_ffmpeg(input_file, output_file, target_sr=16000):
+#     command = [
+#         'ffmpeg', '-i', input_file, '-ar', str(target_sr), output_file
+#     ]
+#     subprocess.run(command, check=True)
+@spaces.GPU
+def load_and_resample_audio(file):
+    waveform, sample_rate = torchaudio.load(file)
+    if sample_rate != SAMPLING_RATE:
+        waveform = F.resample(waveform, sample_rate, SAMPLING_RATE)
+    # Ensure the audio is in the correct shape (mono)
+    if waveform.dim() > 1 and waveform.shape[0] > 1:
+        waveform = waveform.mean(dim=0, keepdim=True)
+    return waveform, SAMPLING_RATE
 @spaces.GPU
 def detect_language(audio):
     whisper_model = get_whisper_model_small()