Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -25,6 +25,7 @@ pipe = pipeline(
|
|
25 |
# Convert audio to WAV format
|
26 |
def convert_to_wav(audio_path):
|
27 |
audio = AudioSegment.from_file(audio_path)
|
|
|
28 |
wav_path = "converted_audio.wav"
|
29 |
audio.export(wav_path, format="wav")
|
30 |
return wav_path
|
@@ -42,9 +43,11 @@ def split_audio(audio_path, chunk_length_ms=30000): # Default: 30 sec per chunk
|
|
42 |
|
43 |
return chunk_paths
|
44 |
|
45 |
-
# **🔹 Fixed:
|
46 |
def transcribe_audio_chunk(chunk_path):
|
47 |
waveform, sampling_rate = torchaudio.load(chunk_path) # Load audio
|
|
|
|
|
48 |
waveform = waveform.numpy() # Convert to numpy
|
49 |
result = pipe({"raw": waveform, "sampling_rate": sampling_rate}) # Pass raw data
|
50 |
return result["text"]
|
|
|
25 |
# Convert audio to WAV format
|
26 |
def convert_to_wav(audio_path):
|
27 |
audio = AudioSegment.from_file(audio_path)
|
28 |
+
audio = audio.set_channels(1) # Ensure mono audio
|
29 |
wav_path = "converted_audio.wav"
|
30 |
audio.export(wav_path, format="wav")
|
31 |
return wav_path
|
|
|
43 |
|
44 |
return chunk_paths
|
45 |
|
46 |
+
# **🔹 Fixed: Convert Stereo to Mono Before Processing**
|
47 |
def transcribe_audio_chunk(chunk_path):
|
48 |
waveform, sampling_rate = torchaudio.load(chunk_path) # Load audio
|
49 |
+
if waveform.shape[0] > 1: # If stereo (more than 1 channel)
|
50 |
+
waveform = torch.mean(waveform, dim=0, keepdim=True) # Convert to mono
|
51 |
waveform = waveform.numpy() # Convert to numpy
|
52 |
result = pipe({"raw": waveform, "sampling_rate": sampling_rate}) # Pass raw data
|
53 |
return result["text"]
|