Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,6 +25,7 @@ pipe = pipeline(
|
|
| 25 |
# Convert audio to WAV format
|
| 26 |
def convert_to_wav(audio_path):
|
| 27 |
audio = AudioSegment.from_file(audio_path)
|
|
|
|
| 28 |
wav_path = "converted_audio.wav"
|
| 29 |
audio.export(wav_path, format="wav")
|
| 30 |
return wav_path
|
|
@@ -42,9 +43,11 @@ def split_audio(audio_path, chunk_length_ms=30000): # Default: 30 sec per chunk
|
|
| 42 |
|
| 43 |
return chunk_paths
|
| 44 |
|
| 45 |
-
# **🔹 Fixed:
|
| 46 |
def transcribe_audio_chunk(chunk_path):
|
| 47 |
waveform, sampling_rate = torchaudio.load(chunk_path) # Load audio
|
|
|
|
|
|
|
| 48 |
waveform = waveform.numpy() # Convert to numpy
|
| 49 |
result = pipe({"raw": waveform, "sampling_rate": sampling_rate}) # Pass raw data
|
| 50 |
return result["text"]
|
|
|
|
| 25 |
# Convert audio to WAV format
|
| 26 |
def convert_to_wav(audio_path):
|
| 27 |
audio = AudioSegment.from_file(audio_path)
|
| 28 |
+
audio = audio.set_channels(1) # Ensure mono audio
|
| 29 |
wav_path = "converted_audio.wav"
|
| 30 |
audio.export(wav_path, format="wav")
|
| 31 |
return wav_path
|
|
|
|
| 43 |
|
| 44 |
return chunk_paths
|
| 45 |
|
| 46 |
+
# **🔹 Fixed: Convert Stereo to Mono Before Processing**
|
| 47 |
def transcribe_audio_chunk(chunk_path):
|
| 48 |
waveform, sampling_rate = torchaudio.load(chunk_path) # Load audio
|
| 49 |
+
if waveform.shape[0] > 1: # If stereo (more than 1 channel)
|
| 50 |
+
waveform = torch.mean(waveform, dim=0, keepdim=True) # Convert to mono
|
| 51 |
waveform = waveform.numpy() # Convert to numpy
|
| 52 |
result = pipe({"raw": waveform, "sampling_rate": sampling_rate}) # Pass raw data
|
| 53 |
return result["text"]
|