Mohssinibra commited on
Commit
e5c4db0
·
verified ·
1 Parent(s): 1053c8b
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -27,22 +27,31 @@ def remove_phone_tonalities(audio, sr):
27
  filtered_audio = signal.filtfilt(b, a, audio)
28
  return filtered_audio
29
 
 
 
 
 
 
 
 
30
  def process_audio(audio_path):
31
  print(f"Received audio file: {audio_path}")
32
 
33
  try:
 
 
 
 
34
  # Load the audio file using librosa
35
- audio, sr = librosa.load(audio_path, sr=None, duration=30)
36
  print(f"Audio loaded: {len(audio)} samples at {sr} Hz")
37
 
38
  # Remove phone tonalities (if any)
39
  audio = remove_phone_tonalities(audio, sr)
40
  print("Phone tonalities removed")
41
 
42
- # Convert to AudioSegment for silence detection
43
- sound = AudioSegment.from_wav(audio_path)
44
-
45
  # Silence detection: split based on silence
 
46
  min_silence_len = 1000 # minimum silence length in ms
47
  silence_thresh = sound.dBFS - 14 # threshold for silence (adjust as needed)
48
  non_silent_chunks = [
@@ -50,7 +59,7 @@ def process_audio(audio_path):
50
  ]
51
 
52
  # Apply diarization (WhisperX)
53
- diarization = diarize_model(audio_path)
54
 
55
  transcriptions = []
56
  for chunk in non_silent_chunks:
@@ -71,6 +80,7 @@ def process_audio(audio_path):
71
 
72
  # Clean up temporary files
73
  os.remove("chunk.wav")
 
74
 
75
  return "\n".join(transcriptions)
76
 
 
27
  filtered_audio = signal.filtfilt(b, a, audio)
28
  return filtered_audio
29
 
30
+ def convert_audio_to_wav(audio_path):
31
+ # Convert any audio format to WAV using pydub
32
+ sound = AudioSegment.from_file(audio_path)
33
+ wav_path = "converted_audio.wav"
34
+ sound.export(wav_path, format="wav")
35
+ return wav_path
36
+
37
  def process_audio(audio_path):
38
  print(f"Received audio file: {audio_path}")
39
 
40
  try:
41
+ # Convert the input audio to WAV format
42
+ wav_path = convert_audio_to_wav(audio_path)
43
+ print(f"Audio converted to WAV: {wav_path}")
44
+
45
  # Load the audio file using librosa
46
+ audio, sr = librosa.load(wav_path, sr=None, duration=30)
47
  print(f"Audio loaded: {len(audio)} samples at {sr} Hz")
48
 
49
  # Remove phone tonalities (if any)
50
  audio = remove_phone_tonalities(audio, sr)
51
  print("Phone tonalities removed")
52
 
 
 
 
53
  # Silence detection: split based on silence
54
+ sound = AudioSegment.from_wav(wav_path)
55
  min_silence_len = 1000 # minimum silence length in ms
56
  silence_thresh = sound.dBFS - 14 # threshold for silence (adjust as needed)
57
  non_silent_chunks = [
 
59
  ]
60
 
61
  # Apply diarization (WhisperX)
62
+ diarization = diarize_model(wav_path)
63
 
64
  transcriptions = []
65
  for chunk in non_silent_chunks:
 
80
 
81
  # Clean up temporary files
82
  os.remove("chunk.wav")
83
+ os.remove(wav_path) # Remove converted wav file
84
 
85
  return "\n".join(transcriptions)
86