Update app.py
Browse files
app.py
CHANGED
@@ -84,6 +84,31 @@ def process_audio(audio_file, max_duration=600): # limit to 5 minutes initially
|
|
84 |
st.error(f"Error processing audio: {str(e)}")
|
85 |
return None
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
def main():
|
88 |
st.title("Multi-Speaker Audio Analyzer")
|
89 |
st.write("Upload an audio file (MP3/WAV) up to 5 minutes long for best performance")
|
@@ -92,7 +117,7 @@ def main():
|
|
92 |
|
93 |
if uploaded_file:
|
94 |
# Display file info
|
95 |
-
file_size = len(uploaded_file.getvalue()) / (1024 * 1024)
|
96 |
st.write(f"File size: {file_size:.2f} MB")
|
97 |
|
98 |
# Display audio player
|
@@ -105,14 +130,44 @@ def main():
|
|
105 |
results = process_audio(uploaded_file)
|
106 |
|
107 |
if results:
|
108 |
-
# Display results in tabs
|
109 |
tab1, tab2, tab3 = st.tabs(["Speakers", "Transcription", "Summary"])
|
110 |
|
111 |
with tab1:
|
112 |
-
st.write("Speaker
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
|
|
116 |
with tab2:
|
117 |
st.write("Transcription:")
|
118 |
st.write(results["transcription"])
|
|
|
84 |
st.error(f"Error processing audio: {str(e)}")
|
85 |
return None
|
86 |
|
87 |
+
def format_speaker_segments(diarization_result):
|
88 |
+
"""Process and format speaker segments by removing very short segments and merging consecutive ones"""
|
89 |
+
formatted_segments = []
|
90 |
+
min_duration = 0.3 # Minimum duration threshold in seconds
|
91 |
+
|
92 |
+
for turn, _, speaker in diarization_result.itertracks(yield_label=True):
|
93 |
+
duration = turn.end - turn.start
|
94 |
+
|
95 |
+
# Skip very short segments
|
96 |
+
if duration < min_duration:
|
97 |
+
continue
|
98 |
+
|
99 |
+
# Add segment if it's the first one or from a different speaker
|
100 |
+
if not formatted_segments or formatted_segments[-1]['speaker'] != speaker:
|
101 |
+
formatted_segments.append({
|
102 |
+
'speaker': speaker,
|
103 |
+
'start': turn.start,
|
104 |
+
'end': turn.end
|
105 |
+
})
|
106 |
+
# Extend the end time if it's the same speaker
|
107 |
+
else:
|
108 |
+
formatted_segments[-1]['end'] = turn.end
|
109 |
+
|
110 |
+
return formatted_segments
|
111 |
+
|
112 |
def main():
|
113 |
st.title("Multi-Speaker Audio Analyzer")
|
114 |
st.write("Upload an audio file (MP3/WAV) up to 5 minutes long for best performance")
|
|
|
117 |
|
118 |
if uploaded_file:
|
119 |
# Display file info
|
120 |
+
file_size = len(uploaded_file.getvalue()) / (1024 * 1024)
|
121 |
st.write(f"File size: {file_size:.2f} MB")
|
122 |
|
123 |
# Display audio player
|
|
|
130 |
results = process_audio(uploaded_file)
|
131 |
|
132 |
if results:
|
|
|
133 |
tab1, tab2, tab3 = st.tabs(["Speakers", "Transcription", "Summary"])
|
134 |
|
135 |
with tab1:
|
136 |
+
st.write("Speaker Timeline:")
|
137 |
+
|
138 |
+
# Process speaker segments
|
139 |
+
segments = format_speaker_segments(results["diarization"])
|
140 |
+
|
141 |
+
# Display segments in a more organized way
|
142 |
+
for segment in segments:
|
143 |
+
# Create columns for better layout
|
144 |
+
col1, col2, col3 = st.columns([2,1,6])
|
145 |
+
|
146 |
+
with col1:
|
147 |
+
# Show speaker with consistent color
|
148 |
+
speaker_num = int(segment['speaker'].split('_')[1])
|
149 |
+
colors = ['π΅', 'π΄', 'π’', 'π‘', 'π£'] # Different colors for different speakers
|
150 |
+
speaker_color = colors[speaker_num % len(colors)]
|
151 |
+
st.write(f"{speaker_color} {segment['speaker']}")
|
152 |
+
|
153 |
+
with col2:
|
154 |
+
# Format time more cleanly
|
155 |
+
start_time = f"{int(segment['start']):02d}:{(segment['start']%60):04.1f}"
|
156 |
+
end_time = f"{int(segment['end']):02d}:{(segment['end']%60):04.1f}"
|
157 |
+
st.write(f"{start_time} β")
|
158 |
+
|
159 |
+
with col3:
|
160 |
+
st.write(f"{end_time}")
|
161 |
+
|
162 |
+
# Add a small separator
|
163 |
+
st.markdown("---")
|
164 |
+
|
165 |
+
# Add legend
|
166 |
+
st.write("\nSpeaker Legend:")
|
167 |
+
for i in range(len(set(s['speaker'] for s in segments))):
|
168 |
+
st.write(f"{colors[i]} SPEAKER_{i:02d}")
|
169 |
|
170 |
+
# Keep original transcription and summary tabs
|
171 |
with tab2:
|
172 |
st.write("Transcription:")
|
173 |
st.write(results["transcription"])
|