Spaces:
Sleeping
Sleeping
..
Browse files
app.py
CHANGED
@@ -27,22 +27,31 @@ def remove_phone_tonalities(audio, sr):
|
|
27 |
filtered_audio = signal.filtfilt(b, a, audio)
|
28 |
return filtered_audio
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
def process_audio(audio_path):
|
31 |
print(f"Received audio file: {audio_path}")
|
32 |
|
33 |
try:
|
|
|
|
|
|
|
|
|
34 |
# Load the audio file using librosa
|
35 |
-
audio, sr = librosa.load(
|
36 |
print(f"Audio loaded: {len(audio)} samples at {sr} Hz")
|
37 |
|
38 |
# Remove phone tonalities (if any)
|
39 |
audio = remove_phone_tonalities(audio, sr)
|
40 |
print("Phone tonalities removed")
|
41 |
|
42 |
-
# Convert to AudioSegment for silence detection
|
43 |
-
sound = AudioSegment.from_wav(audio_path)
|
44 |
-
|
45 |
# Silence detection: split based on silence
|
|
|
46 |
min_silence_len = 1000 # minimum silence length in ms
|
47 |
silence_thresh = sound.dBFS - 14 # threshold for silence (adjust as needed)
|
48 |
non_silent_chunks = [
|
@@ -50,7 +59,7 @@ def process_audio(audio_path):
|
|
50 |
]
|
51 |
|
52 |
# Apply diarization (WhisperX)
|
53 |
-
diarization = diarize_model(
|
54 |
|
55 |
transcriptions = []
|
56 |
for chunk in non_silent_chunks:
|
@@ -71,6 +80,7 @@ def process_audio(audio_path):
|
|
71 |
|
72 |
# Clean up temporary files
|
73 |
os.remove("chunk.wav")
|
|
|
74 |
|
75 |
return "\n".join(transcriptions)
|
76 |
|
|
|
27 |
filtered_audio = signal.filtfilt(b, a, audio)
|
28 |
return filtered_audio
|
29 |
|
30 |
+
def convert_audio_to_wav(audio_path):
|
31 |
+
# Convert any audio format to WAV using pydub
|
32 |
+
sound = AudioSegment.from_file(audio_path)
|
33 |
+
wav_path = "converted_audio.wav"
|
34 |
+
sound.export(wav_path, format="wav")
|
35 |
+
return wav_path
|
36 |
+
|
37 |
def process_audio(audio_path):
|
38 |
print(f"Received audio file: {audio_path}")
|
39 |
|
40 |
try:
|
41 |
+
# Convert the input audio to WAV format
|
42 |
+
wav_path = convert_audio_to_wav(audio_path)
|
43 |
+
print(f"Audio converted to WAV: {wav_path}")
|
44 |
+
|
45 |
# Load the audio file using librosa
|
46 |
+
audio, sr = librosa.load(wav_path, sr=None, duration=30)
|
47 |
print(f"Audio loaded: {len(audio)} samples at {sr} Hz")
|
48 |
|
49 |
# Remove phone tonalities (if any)
|
50 |
audio = remove_phone_tonalities(audio, sr)
|
51 |
print("Phone tonalities removed")
|
52 |
|
|
|
|
|
|
|
53 |
# Silence detection: split based on silence
|
54 |
+
sound = AudioSegment.from_wav(wav_path)
|
55 |
min_silence_len = 1000 # minimum silence length in ms
|
56 |
silence_thresh = sound.dBFS - 14 # threshold for silence (adjust as needed)
|
57 |
non_silent_chunks = [
|
|
|
59 |
]
|
60 |
|
61 |
# Apply diarization (WhisperX)
|
62 |
+
diarization = diarize_model(wav_path)
|
63 |
|
64 |
transcriptions = []
|
65 |
for chunk in non_silent_chunks:
|
|
|
80 |
|
81 |
# Clean up temporary files
|
82 |
os.remove("chunk.wav")
|
83 |
+
os.remove(wav_path) # Remove converted wav file
|
84 |
|
85 |
return "\n".join(transcriptions)
|
86 |
|