Spaces:

hackergeek98
/

tinyyy

Sleeping

hackergeek98 commited on Mar 24

Commit

918e357

verified ·

1 Parent(s): d10f84e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,6 +25,7 @@ pipe = pipeline(
 # Convert audio to WAV format
 def convert_to_wav(audio_path):
     audio = AudioSegment.from_file(audio_path)
     wav_path = "converted_audio.wav"
     audio.export(wav_path, format="wav")
     return wav_path
@@ -42,9 +43,11 @@ def split_audio(audio_path, chunk_length_ms=30000):  # Default: 30 sec per chunk
     return chunk_paths
-# **🔹 Fixed: Read Audio Before Passing to Model**
 def transcribe_audio_chunk(chunk_path):
     waveform, sampling_rate = torchaudio.load(chunk_path)  # Load audio
     waveform = waveform.numpy()  # Convert to numpy
     result = pipe({"raw": waveform, "sampling_rate": sampling_rate})  # Pass raw data
     return result["text"]

 # Convert audio to WAV format
 def convert_to_wav(audio_path):
     audio = AudioSegment.from_file(audio_path)
+    audio = audio.set_channels(1)  # Ensure mono audio
     wav_path = "converted_audio.wav"
     audio.export(wav_path, format="wav")
     return wav_path
     return chunk_paths
+# **🔹 Fixed: Convert Stereo to Mono Before Processing**
 def transcribe_audio_chunk(chunk_path):
     waveform, sampling_rate = torchaudio.load(chunk_path)  # Load audio
+    if waveform.shape[0] > 1:  # If stereo (more than 1 channel)
+        waveform = torch.mean(waveform, dim=0, keepdim=True)  # Convert to mono
     waveform = waveform.numpy()  # Convert to numpy
     result = pipe({"raw": waveform, "sampling_rate": sampling_rate})  # Pass raw data
     return result["text"]