hackergeek98 commited on
Commit
918e357
·
verified ·
1 Parent(s): d10f84e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -25,6 +25,7 @@ pipe = pipeline(
25
  # Convert audio to WAV format
26
  def convert_to_wav(audio_path):
27
  audio = AudioSegment.from_file(audio_path)
 
28
  wav_path = "converted_audio.wav"
29
  audio.export(wav_path, format="wav")
30
  return wav_path
@@ -42,9 +43,11 @@ def split_audio(audio_path, chunk_length_ms=30000): # Default: 30 sec per chunk
42
 
43
  return chunk_paths
44
 
45
- # **🔹 Fixed: Read Audio Before Passing to Model**
46
  def transcribe_audio_chunk(chunk_path):
47
  waveform, sampling_rate = torchaudio.load(chunk_path) # Load audio
 
 
48
  waveform = waveform.numpy() # Convert to numpy
49
  result = pipe({"raw": waveform, "sampling_rate": sampling_rate}) # Pass raw data
50
  return result["text"]
 
25
  # Convert audio to WAV format
26
  def convert_to_wav(audio_path):
27
  audio = AudioSegment.from_file(audio_path)
28
+ audio = audio.set_channels(1) # Ensure mono audio
29
  wav_path = "converted_audio.wav"
30
  audio.export(wav_path, format="wav")
31
  return wav_path
 
43
 
44
  return chunk_paths
45
 
46
+ # **🔹 Fixed: Convert Stereo to Mono Before Processing**
47
  def transcribe_audio_chunk(chunk_path):
48
  waveform, sampling_rate = torchaudio.load(chunk_path) # Load audio
49
+ if waveform.shape[0] > 1: # If stereo (more than 1 channel)
50
+ waveform = torch.mean(waveform, dim=0, keepdim=True) # Convert to mono
51
  waveform = waveform.numpy() # Convert to numpy
52
  result = pipe({"raw": waveform, "sampling_rate": sampling_rate}) # Pass raw data
53
  return result["text"]