Manyue-DataScientist commited on
Commit
e0d61c7
Β·
verified Β·
1 Parent(s): caa4c85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -5
app.py CHANGED
@@ -84,6 +84,31 @@ def process_audio(audio_file, max_duration=600): # limit to 5 minutes initially
84
  st.error(f"Error processing audio: {str(e)}")
85
  return None
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  def main():
88
  st.title("Multi-Speaker Audio Analyzer")
89
  st.write("Upload an audio file (MP3/WAV) up to 5 minutes long for best performance")
@@ -92,7 +117,7 @@ def main():
92
 
93
  if uploaded_file:
94
  # Display file info
95
- file_size = len(uploaded_file.getvalue()) / (1024 * 1024) # Convert to MB
96
  st.write(f"File size: {file_size:.2f} MB")
97
 
98
  # Display audio player
@@ -105,14 +130,44 @@ def main():
105
  results = process_audio(uploaded_file)
106
 
107
  if results:
108
- # Display results in tabs
109
  tab1, tab2, tab3 = st.tabs(["Speakers", "Transcription", "Summary"])
110
 
111
  with tab1:
112
- st.write("Speaker Segments:")
113
- for turn, _, speaker in results["diarization"].itertracks(yield_label=True):
114
- st.write(f"{speaker}: {turn.start:.1f}s β†’ {turn.end:.1f}s")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
116
  with tab2:
117
  st.write("Transcription:")
118
  st.write(results["transcription"])
 
84
  st.error(f"Error processing audio: {str(e)}")
85
  return None
86
 
87
+ def format_speaker_segments(diarization_result):
88
+ """Process and format speaker segments by removing very short segments and merging consecutive ones"""
89
+ formatted_segments = []
90
+ min_duration = 0.3 # Minimum duration threshold in seconds
91
+
92
+ for turn, _, speaker in diarization_result.itertracks(yield_label=True):
93
+ duration = turn.end - turn.start
94
+
95
+ # Skip very short segments
96
+ if duration < min_duration:
97
+ continue
98
+
99
+ # Add segment if it's the first one or from a different speaker
100
+ if not formatted_segments or formatted_segments[-1]['speaker'] != speaker:
101
+ formatted_segments.append({
102
+ 'speaker': speaker,
103
+ 'start': turn.start,
104
+ 'end': turn.end
105
+ })
106
+ # Extend the end time if it's the same speaker
107
+ else:
108
+ formatted_segments[-1]['end'] = turn.end
109
+
110
+ return formatted_segments
111
+
112
  def main():
113
  st.title("Multi-Speaker Audio Analyzer")
114
  st.write("Upload an audio file (MP3/WAV) up to 5 minutes long for best performance")
 
117
 
118
  if uploaded_file:
119
  # Display file info
120
+ file_size = len(uploaded_file.getvalue()) / (1024 * 1024)
121
  st.write(f"File size: {file_size:.2f} MB")
122
 
123
  # Display audio player
 
130
  results = process_audio(uploaded_file)
131
 
132
  if results:
 
133
  tab1, tab2, tab3 = st.tabs(["Speakers", "Transcription", "Summary"])
134
 
135
  with tab1:
136
+ st.write("Speaker Timeline:")
137
+
138
+ # Process speaker segments
139
+ segments = format_speaker_segments(results["diarization"])
140
+
141
+ # Display segments in a more organized way
142
+ for segment in segments:
143
+ # Create columns for better layout
144
+ col1, col2, col3 = st.columns([2,1,6])
145
+
146
+ with col1:
147
+ # Show speaker with consistent color
148
+ speaker_num = int(segment['speaker'].split('_')[1])
149
+ colors = ['πŸ”΅', 'πŸ”΄', '🟒', '🟑', '🟣'] # Different colors for different speakers
150
+ speaker_color = colors[speaker_num % len(colors)]
151
+ st.write(f"{speaker_color} {segment['speaker']}")
152
+
153
+ with col2:
154
+ # Format time more cleanly
155
+ start_time = f"{int(segment['start']):02d}:{(segment['start']%60):04.1f}"
156
+ end_time = f"{int(segment['end']):02d}:{(segment['end']%60):04.1f}"
157
+ st.write(f"{start_time} β†’")
158
+
159
+ with col3:
160
+ st.write(f"{end_time}")
161
+
162
+ # Add a small separator
163
+ st.markdown("---")
164
+
165
+ # Add legend
166
+ st.write("\nSpeaker Legend:")
167
+ for i in range(len(set(s['speaker'] for s in segments))):
168
+ st.write(f"{colors[i]} SPEAKER_{i:02d}")
169
 
170
+ # Keep original transcription and summary tabs
171
  with tab2:
172
  st.write("Transcription:")
173
  st.write(results["transcription"])