shukdevdatta123 commited on
Commit
ee377d8
Β·
verified Β·
1 Parent(s): d1d87d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +463 -151
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import streamlit as st #
2
  import moviepy.editor as mp
3
  import speech_recognition as sr
4
  from pydub import AudioSegment
@@ -7,194 +7,506 @@ import os
7
  import io
8
  from transformers import pipeline
9
  import matplotlib.pyplot as plt
 
 
 
10
 
11
- # Function to convert video to audio
12
- def video_to_audio(video_file):
13
- # Load the video using moviepy
14
- video = mp.VideoFileClip(video_file)
15
-
16
- # Extract audio
17
- audio = video.audio
18
- temp_audio_path = tempfile.mktemp(suffix=".mp3")
19
-
20
- # Write the audio to a file
21
- audio.write_audiofile(temp_audio_path)
22
- return temp_audio_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # Function to convert MP3 audio to WAV
25
  def convert_mp3_to_wav(mp3_file):
26
- # Load the MP3 file using pydub
27
- audio = AudioSegment.from_mp3(mp3_file)
28
-
29
- # Create a temporary WAV file
30
- temp_wav_path = tempfile.mktemp(suffix=".wav")
31
-
32
- # Export the audio to the temporary WAV file
33
- audio.export(temp_wav_path, format="wav")
34
- return temp_wav_path
35
-
36
- # Function to transcribe audio to text
37
- def transcribe_audio(audio_file):
38
- # Initialize recognizer
39
- recognizer = sr.Recognizer()
40
-
41
- # Load the audio file using speech_recognition
42
- audio = sr.AudioFile(audio_file)
43
-
44
- with audio as source:
45
- audio_data = recognizer.record(source)
46
-
 
 
47
  try:
48
- # Transcribe the audio data to text using Google Web Speech API
49
- text = recognizer.recognize_google(audio_data)
50
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  except sr.UnknownValueError:
52
  return "Audio could not be understood."
53
- except sr.RequestError:
54
- return "Could not request results from Google Speech Recognition service."
 
 
55
 
56
  # Function to perform emotion detection using Hugging Face transformers
 
 
 
 
 
 
 
57
  def detect_emotion(text):
58
- # Load emotion detection pipeline
59
- emotion_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- # Get the emotion predictions
62
- result = emotion_pipeline(text)
 
 
63
 
64
- # Extract the emotion with the highest score
65
- emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
66
- return emotions
 
 
 
 
 
 
67
 
68
  # Streamlit app layout
69
- st.title("Video and Audio to Text Transcription with Emotion Detection and Visualization")
70
- st.write("Upload a video or audio file to convert it to transcription, detect emotions, and visualize the audio waveform.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  # Create tabs to separate video and audio uploads
73
- tab = st.selectbox("Select the type of file to upload", ["Video", "Audio"])
74
 
75
- if tab == "Video":
76
- # File uploader for video
77
- uploaded_video = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"])
 
 
 
 
 
 
78
 
79
  if uploaded_video is not None:
 
 
 
 
 
 
 
 
80
  # Save the uploaded video file temporarily
81
- with tempfile.NamedTemporaryFile(delete=False) as tmp_video:
82
  tmp_video.write(uploaded_video.read())
83
  tmp_video_path = tmp_video.name
84
 
85
  # Add an "Analyze Video" button
86
- if st.button("Analyze Video"):
87
- with st.spinner("Processing video... Please wait."):
88
-
89
- # Convert video to audio
90
- audio_file = video_to_audio(tmp_video_path)
91
-
92
- # Convert the extracted MP3 audio to WAV
93
- wav_audio_file = convert_mp3_to_wav(audio_file)
94
-
95
- # Transcribe audio to text
96
- transcription = transcribe_audio(wav_audio_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- # Show the transcription
99
- st.text_area("Transcription", transcription, height=300)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- # Emotion detection
102
- emotions = detect_emotion(transcription)
103
- st.write(f"Detected Emotions: {emotions}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- # Store transcription and audio file in session state
106
- st.session_state.transcription = transcription
107
-
108
- # Store the audio file as a BytesIO object in memory
109
- with open(wav_audio_file, "rb") as f:
110
- audio_data = f.read()
111
- st.session_state.wav_audio_file = io.BytesIO(audio_data)
112
-
113
- # Cleanup temporary files
114
- os.remove(tmp_video_path)
115
- os.remove(audio_file)
116
-
117
- # Check if transcription and audio file are stored in session state
118
- if 'transcription' in st.session_state and 'wav_audio_file' in st.session_state:
119
- # Provide the audio file to the user for download
120
- st.audio(st.session_state.wav_audio_file, format='audio/wav')
121
-
122
- # Add download buttons for the transcription and audio
123
- # Downloadable transcription file
124
- st.download_button(
125
- label="Download Transcription",
126
- data=st.session_state.transcription,
127
- file_name="transcription.txt",
128
- mime="text/plain"
129
- )
130
-
131
- # Downloadable audio file
132
- st.download_button(
133
- label="Download Audio",
134
- data=st.session_state.wav_audio_file,
135
- file_name="converted_audio.wav",
136
- mime="audio/wav"
137
- )
138
-
139
- elif tab == "Audio":
140
  # File uploader for audio
141
- uploaded_audio = st.file_uploader("Upload Audio", type=["wav", "mp3"])
 
 
 
 
142
 
143
  if uploaded_audio is not None:
 
 
 
 
 
 
 
144
  # Save the uploaded audio file temporarily
145
  with tempfile.NamedTemporaryFile(delete=False) as tmp_audio:
146
  tmp_audio.write(uploaded_audio.read())
147
  tmp_audio_path = tmp_audio.name
148
 
149
  # Add an "Analyze Audio" button
150
- if st.button("Analyze Audio"):
151
- with st.spinner("Processing audio... Please wait."):
152
-
153
- # Convert audio to WAV if it's in MP3 format
154
- if uploaded_audio.type == "audio/mpeg":
155
- wav_audio_file = convert_mp3_to_wav(tmp_audio_path)
156
- else:
157
- wav_audio_file = tmp_audio_path
158
-
159
- # Transcribe audio to text
160
- transcription = transcribe_audio(wav_audio_file)
161
 
162
- # Show the transcription
163
- st.text_area("Transcription", transcription, height=300)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
- # Emotion detection
166
- emotions = detect_emotion(transcription)
167
- st.write(f"Detected Emotions: {emotions}")
 
 
 
 
 
168
 
169
- # Store transcription in session state
170
- st.session_state.transcription_audio = transcription
171
-
172
- # Store the audio file as a BytesIO object in memory
173
- with open(wav_audio_file, "rb") as f:
174
- audio_data = f.read()
175
- st.session_state.wav_audio_file_audio = io.BytesIO(audio_data)
176
-
177
- # Cleanup temporary audio file
178
- os.remove(tmp_audio_path)
179
-
180
- # Check if transcription and audio file are stored in session state
181
- if 'transcription_audio' in st.session_state and 'wav_audio_file_audio' in st.session_state:
182
- # Provide the audio file to the user for download
183
- st.audio(st.session_state.wav_audio_file_audio, format='audio/wav')
 
 
 
184
 
185
- # Add download buttons for the transcription and audio
186
- # Downloadable transcription file
187
- st.download_button(
188
- label="Download Transcription",
189
- data=st.session_state.transcription_audio,
190
- file_name="transcription_audio.txt",
191
- mime="text/plain"
192
- )
193
 
194
- # Downloadable audio file
195
- st.download_button(
196
- label="Download Audio",
197
- data=st.session_state.wav_audio_file_audio,
198
- file_name="converted_audio_audio.wav",
199
- mime="audio/wav"
200
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import moviepy.editor as mp
3
  import speech_recognition as sr
4
  from pydub import AudioSegment
 
7
  import io
8
  from transformers import pipeline
9
  import matplotlib.pyplot as plt
10
+ import gc
11
+ import warnings
12
+ warnings.filterwarnings("ignore")
13
 
14
+ # Configure Streamlit for large file uploads
15
+ st.set_page_config(
16
+ page_title="Video/Audio Transcription with Emotion Detection",
17
+ page_icon="🎬",
18
+ layout="wide"
19
+ )
20
+
21
+ # Set maximum upload size (this needs to be set before any file upload widgets)
22
+ # Note: You'll also need to configure this in your Streamlit config file or environment
23
+ @st.cache_data
24
+ def get_config():
25
+ return {"maxUploadSize": 1024} # 1GB in MB
26
+
27
+ # Function to convert video to audio with progress tracking
28
+ def video_to_audio(video_file, progress_callback=None):
29
+ """Convert video to audio with memory optimization"""
30
+ try:
31
+ # Load the video using moviepy with memory optimization
32
+ video = mp.VideoFileClip(video_file)
33
+
34
+ # Extract audio
35
+ audio = video.audio
36
+ temp_audio_path = tempfile.mktemp(suffix=".mp3")
37
+
38
+ # Write the audio to a file with progress tracking
39
+ if progress_callback:
40
+ progress_callback(50) # 50% progress
41
+
42
+ audio.write_audiofile(temp_audio_path, verbose=False, logger=None)
43
+
44
+ # Clean up video object to free memory
45
+ audio.close()
46
+ video.close()
47
+ del video, audio
48
+ gc.collect()
49
+
50
+ if progress_callback:
51
+ progress_callback(100) # 100% progress
52
+
53
+ return temp_audio_path
54
+ except Exception as e:
55
+ st.error(f"Error converting video to audio: {str(e)}")
56
+ return None
57
 
58
  # Function to convert MP3 audio to WAV
59
  def convert_mp3_to_wav(mp3_file):
60
+ """Convert MP3 to WAV with memory optimization"""
61
+ try:
62
+ # Load the MP3 file using pydub
63
+ audio = AudioSegment.from_mp3(mp3_file)
64
+
65
+ # Create a temporary WAV file
66
+ temp_wav_path = tempfile.mktemp(suffix=".wav")
67
+
68
+ # Export the audio to the temporary WAV file
69
+ audio.export(temp_wav_path, format="wav")
70
+
71
+ # Clean up to free memory
72
+ del audio
73
+ gc.collect()
74
+
75
+ return temp_wav_path
76
+ except Exception as e:
77
+ st.error(f"Error converting MP3 to WAV: {str(e)}")
78
+ return None
79
+
80
+ # Function to transcribe audio to text with chunking for large files
81
+ def transcribe_audio(audio_file, chunk_duration=60):
82
+ """Transcribe audio to text with chunking for large files"""
83
  try:
84
+ # Initialize recognizer
85
+ recognizer = sr.Recognizer()
86
+
87
+ # Load audio and get duration
88
+ audio_segment = AudioSegment.from_wav(audio_file)
89
+ duration = len(audio_segment) / 1000 # Duration in seconds
90
+
91
+ transcriptions = []
92
+
93
+ # If audio is longer than chunk_duration, split it
94
+ if duration > chunk_duration:
95
+ num_chunks = int(duration / chunk_duration) + 1
96
+
97
+ for i in range(num_chunks):
98
+ start_time = i * chunk_duration * 1000 # Convert to milliseconds
99
+ end_time = min((i + 1) * chunk_duration * 1000, len(audio_segment))
100
+
101
+ # Extract chunk
102
+ chunk = audio_segment[start_time:end_time]
103
+
104
+ # Save chunk temporarily
105
+ chunk_path = tempfile.mktemp(suffix=".wav")
106
+ chunk.export(chunk_path, format="wav")
107
+
108
+ # Transcribe chunk
109
+ try:
110
+ with sr.AudioFile(chunk_path) as source:
111
+ audio_data = recognizer.record(source)
112
+ text = recognizer.recognize_google(audio_data)
113
+ transcriptions.append(text)
114
+ except (sr.UnknownValueError, sr.RequestError):
115
+ transcriptions.append(f"[Chunk {i+1}: Audio could not be transcribed]")
116
+
117
+ # Clean up chunk file
118
+ os.remove(chunk_path)
119
+
120
+ # Update progress
121
+ progress = int(((i + 1) / num_chunks) * 100)
122
+ st.progress(progress / 100, text=f"Transcribing... {progress}%")
123
+
124
+ else:
125
+ # For shorter audio, transcribe directly
126
+ with sr.AudioFile(audio_file) as source:
127
+ audio_data = recognizer.record(source)
128
+ text = recognizer.recognize_google(audio_data)
129
+ transcriptions.append(text)
130
+
131
+ # Join all transcriptions
132
+ full_transcription = " ".join(transcriptions)
133
+
134
+ # Clean up
135
+ del audio_segment
136
+ gc.collect()
137
+
138
+ return full_transcription
139
+
140
  except sr.UnknownValueError:
141
  return "Audio could not be understood."
142
+ except sr.RequestError as e:
143
+ return f"Could not request results from Google Speech Recognition service: {str(e)}"
144
+ except Exception as e:
145
+ return f"Error during transcription: {str(e)}"
146
 
147
  # Function to perform emotion detection using Hugging Face transformers
148
+ @st.cache_resource
149
+ def load_emotion_model():
150
+ """Load emotion detection model (cached)"""
151
+ return pipeline("text-classification",
152
+ model="j-hartmann/emotion-english-distilroberta-base",
153
+ return_all_scores=True)
154
+
155
  def detect_emotion(text):
156
+ """Detect emotions in text"""
157
+ try:
158
+ emotion_pipeline = load_emotion_model()
159
+
160
+ # Split text into chunks if it's too long (model has token limits)
161
+ max_length = 500
162
+ if len(text) > max_length:
163
+ chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
164
+ all_emotions = {}
165
+
166
+ for chunk in chunks:
167
+ result = emotion_pipeline(chunk)
168
+ chunk_emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
169
+
170
+ # Aggregate emotions
171
+ for emotion, score in chunk_emotions.items():
172
+ if emotion in all_emotions:
173
+ all_emotions[emotion] = (all_emotions[emotion] + score) / 2
174
+ else:
175
+ all_emotions[emotion] = score
176
+
177
+ return all_emotions
178
+ else:
179
+ result = emotion_pipeline(text)
180
+ emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
181
+ return emotions
182
+
183
+ except Exception as e:
184
+ st.error(f"Error in emotion detection: {str(e)}")
185
+ return {"error": "Could not analyze emotions"}
186
+
187
+ # Function to visualize emotions
188
+ def plot_emotions(emotions):
189
+ """Create a bar chart of emotions"""
190
+ if "error" in emotions:
191
+ return None
192
+
193
+ fig, ax = plt.subplots(figsize=(10, 6))
194
+ emotions_sorted = dict(sorted(emotions.items(), key=lambda x: x[1], reverse=True))
195
+
196
+ colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8']
197
+ bars = ax.bar(emotions_sorted.keys(), emotions_sorted.values(),
198
+ color=colors[:len(emotions_sorted)])
199
 
200
+ ax.set_xlabel('Emotions')
201
+ ax.set_ylabel('Confidence Score')
202
+ ax.set_title('Emotion Detection Results')
203
+ ax.set_ylim(0, 1)
204
 
205
+ # Add value labels on bars
206
+ for bar in bars:
207
+ height = bar.get_height()
208
+ ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
209
+ f'{height:.3f}', ha='center', va='bottom')
210
+
211
+ plt.xticks(rotation=45)
212
+ plt.tight_layout()
213
+ return fig
214
 
215
  # Streamlit app layout
216
+ st.title("🎬 Video and Audio Transcription with Emotion Detection")
217
+ st.write("Upload video files up to 1GB or audio files for transcription and emotion analysis.")
218
+
219
+ # Display file size information
220
+ st.info("πŸ“ **File Size Limits**: Video files up to 1GB, Audio files up to 500MB")
221
+
222
+ # Add instructions for large file uploads
223
+ with st.expander("πŸ“‹ Instructions for Large Files"):
224
+ st.write("""
225
+ **For optimal performance with large files:**
226
+ 1. Ensure stable internet connection
227
+ 2. Be patient - large files take time to process
228
+ 3. Don't close the browser tab during processing
229
+ 4. For very large files, consider splitting them beforehand
230
+
231
+ **Supported formats:**
232
+ - **Video**: MP4, MOV, AVI
233
+ - **Audio**: WAV, MP3
234
+ """)
235
 
236
  # Create tabs to separate video and audio uploads
237
+ tab1, tab2 = st.tabs(["πŸ“Ή Video Upload", "🎡 Audio Upload"])
238
 
239
+ with tab1:
240
+ st.header("Video File Processing")
241
+
242
+ # File uploader for video with increased size limit
243
+ uploaded_video = st.file_uploader(
244
+ "Upload Video File",
245
+ type=["mp4", "mov", "avi"],
246
+ help="Maximum file size: 1GB"
247
+ )
248
 
249
  if uploaded_video is not None:
250
+ # Display file information
251
+ file_size_mb = uploaded_video.size / (1024 * 1024)
252
+ st.info(f"πŸ“Š **File Info**: {uploaded_video.name} ({file_size_mb:.1f} MB)")
253
+
254
+ # Show video preview for smaller files
255
+ if file_size_mb < 100: # Only show preview for files under 100MB
256
+ st.video(uploaded_video)
257
+
258
  # Save the uploaded video file temporarily
259
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_video:
260
  tmp_video.write(uploaded_video.read())
261
  tmp_video_path = tmp_video.name
262
 
263
  # Add an "Analyze Video" button
264
+ if st.button("πŸ”„ Analyze Video", type="primary"):
265
+ progress_bar = st.progress(0)
266
+ status_text = st.empty()
267
+
268
+ try:
269
+ with st.spinner("Processing video... This may take several minutes for large files."):
270
+
271
+ status_text.text("Step 1/4: Converting video to audio...")
272
+ progress_bar.progress(10)
273
+
274
+ # Convert video to audio
275
+ audio_file = video_to_audio(tmp_video_path,
276
+ lambda p: progress_bar.progress(10 + p * 0.3))
277
+
278
+ if audio_file is None:
279
+ st.error("Failed to extract audio from video.")
280
+ st.stop()
281
+
282
+ status_text.text("Step 2/4: Converting audio format...")
283
+ progress_bar.progress(50)
284
+
285
+ # Convert the extracted MP3 audio to WAV
286
+ wav_audio_file = convert_mp3_to_wav(audio_file)
287
+
288
+ if wav_audio_file is None:
289
+ st.error("Failed to convert audio format.")
290
+ st.stop()
291
+
292
+ status_text.text("Step 3/4: Transcribing audio to text...")
293
+ progress_bar.progress(60)
294
+
295
+ # Transcribe audio to text
296
+ transcription = transcribe_audio(wav_audio_file)
297
+
298
+ status_text.text("Step 4/4: Analyzing emotions...")
299
+ progress_bar.progress(90)
300
+
301
+ # Emotion detection
302
+ emotions = detect_emotion(transcription)
303
+
304
+ progress_bar.progress(100)
305
+ status_text.text("βœ… Processing complete!")
306
+
307
+ # Display results
308
+ st.success("Analysis completed successfully!")
309
+
310
+ # Show the transcription
311
+ st.subheader("πŸ“ Transcription")
312
+ st.text_area("", transcription, height=300, key="video_transcription")
313
+
314
+ # Show emotions
315
+ st.subheader("😊 Emotion Analysis")
316
+ col1, col2 = st.columns([1, 1])
317
+
318
+ with col1:
319
+ st.write("**Detected Emotions:**")
320
+ for emotion, score in emotions.items():
321
+ st.write(f"- **{emotion.title()}**: {score:.3f}")
322
+
323
+ with col2:
324
+ fig = plot_emotions(emotions)
325
+ if fig:
326
+ st.pyplot(fig)
327
+
328
+ # Store results in session state
329
+ st.session_state.video_transcription = transcription
330
+ st.session_state.video_emotions = emotions
331
+
332
+ # Store the audio file as a BytesIO object in memory
333
+ with open(wav_audio_file, "rb") as f:
334
+ audio_data = f.read()
335
+ st.session_state.video_wav_audio_file = io.BytesIO(audio_data)
336
 
337
+ # Cleanup temporary files
338
+ os.remove(tmp_video_path)
339
+ os.remove(audio_file)
340
+ os.remove(wav_audio_file)
341
+
342
+ except Exception as e:
343
+ st.error(f"An error occurred during processing: {str(e)}")
344
+ # Clean up files in case of error
345
+ try:
346
+ os.remove(tmp_video_path)
347
+ if 'audio_file' in locals() and audio_file:
348
+ os.remove(audio_file)
349
+ if 'wav_audio_file' in locals() and wav_audio_file:
350
+ os.remove(wav_audio_file)
351
+ except:
352
+ pass
353
 
354
+ # Check if results are stored in session state
355
+ if 'video_transcription' in st.session_state and 'video_wav_audio_file' in st.session_state:
356
+ st.subheader("πŸ“₯ Download Results")
357
+
358
+ col1, col2, col3 = st.columns(3)
359
+
360
+ with col1:
361
+ # Provide the audio file to the user for playback
362
+ st.audio(st.session_state.video_wav_audio_file, format='audio/wav')
363
+
364
+ with col2:
365
+ # Downloadable transcription file
366
+ st.download_button(
367
+ label="πŸ“„ Download Transcription",
368
+ data=st.session_state.video_transcription,
369
+ file_name="video_transcription.txt",
370
+ mime="text/plain"
371
+ )
372
+
373
+ with col3:
374
+ # Downloadable audio file
375
+ st.download_button(
376
+ label="🎡 Download Audio",
377
+ data=st.session_state.video_wav_audio_file,
378
+ file_name="extracted_audio.wav",
379
+ mime="audio/wav"
380
+ )
381
 
382
+ with tab2:
383
+ st.header("Audio File Processing")
384
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  # File uploader for audio
386
+ uploaded_audio = st.file_uploader(
387
+ "Upload Audio File",
388
+ type=["wav", "mp3"],
389
+ help="Maximum file size: 500MB"
390
+ )
391
 
392
  if uploaded_audio is not None:
393
+ # Display file information
394
+ file_size_mb = uploaded_audio.size / (1024 * 1024)
395
+ st.info(f"πŸ“Š **File Info**: {uploaded_audio.name} ({file_size_mb:.1f} MB)")
396
+
397
+ # Show audio player
398
+ st.audio(uploaded_audio)
399
+
400
  # Save the uploaded audio file temporarily
401
  with tempfile.NamedTemporaryFile(delete=False) as tmp_audio:
402
  tmp_audio.write(uploaded_audio.read())
403
  tmp_audio_path = tmp_audio.name
404
 
405
  # Add an "Analyze Audio" button
406
+ if st.button("πŸ”„ Analyze Audio", type="primary"):
407
+ progress_bar = st.progress(0)
408
+ status_text = st.empty()
409
+
410
+ try:
411
+ with st.spinner("Processing audio... Please wait."):
 
 
 
 
 
412
 
413
+ status_text.text("Step 1/3: Converting audio format...")
414
+ progress_bar.progress(20)
415
+
416
+ # Convert audio to WAV if it's in MP3 format
417
+ if uploaded_audio.type == "audio/mpeg":
418
+ wav_audio_file = convert_mp3_to_wav(tmp_audio_path)
419
+ else:
420
+ wav_audio_file = tmp_audio_path
421
+
422
+ if wav_audio_file is None:
423
+ st.error("Failed to process audio file.")
424
+ st.stop()
425
+
426
+ status_text.text("Step 2/3: Transcribing audio to text...")
427
+ progress_bar.progress(40)
428
+
429
+ # Transcribe audio to text
430
+ transcription = transcribe_audio(wav_audio_file)
431
+
432
+ status_text.text("Step 3/3: Analyzing emotions...")
433
+ progress_bar.progress(80)
434
+
435
+ # Emotion detection
436
+ emotions = detect_emotion(transcription)
437
+
438
+ progress_bar.progress(100)
439
+ status_text.text("βœ… Processing complete!")
440
+
441
+ # Display results
442
+ st.success("Analysis completed successfully!")
443
+
444
+ # Show the transcription
445
+ st.subheader("πŸ“ Transcription")
446
+ st.text_area("", transcription, height=300, key="audio_transcription")
447
+
448
+ # Show emotions
449
+ st.subheader("😊 Emotion Analysis")
450
+ col1, col2 = st.columns([1, 1])
451
+
452
+ with col1:
453
+ st.write("**Detected Emotions:**")
454
+ for emotion, score in emotions.items():
455
+ st.write(f"- **{emotion.title()}**: {score:.3f}")
456
+
457
+ with col2:
458
+ fig = plot_emotions(emotions)
459
+ if fig:
460
+ st.pyplot(fig)
461
 
462
+ # Store results in session state
463
+ st.session_state.audio_transcription = transcription
464
+ st.session_state.audio_emotions = emotions
465
+
466
+ # Store the audio file as a BytesIO object in memory
467
+ with open(wav_audio_file, "rb") as f:
468
+ audio_data = f.read()
469
+ st.session_state.audio_wav_audio_file = io.BytesIO(audio_data)
470
 
471
+ # Cleanup temporary audio file
472
+ os.remove(tmp_audio_path)
473
+ if wav_audio_file != tmp_audio_path:
474
+ os.remove(wav_audio_file)
475
+
476
+ except Exception as e:
477
+ st.error(f"An error occurred during processing: {str(e)}")
478
+ # Clean up files in case of error
479
+ try:
480
+ os.remove(tmp_audio_path)
481
+ if 'wav_audio_file' in locals() and wav_audio_file and wav_audio_file != tmp_audio_path:
482
+ os.remove(wav_audio_file)
483
+ except:
484
+ pass
485
+
486
+ # Check if results are stored in session state
487
+ if 'audio_transcription' in st.session_state and 'audio_wav_audio_file' in st.session_state:
488
+ st.subheader("πŸ“₯ Download Results")
489
 
490
+ col1, col2 = st.columns(2)
 
 
 
 
 
 
 
491
 
492
+ with col1:
493
+ # Downloadable transcription file
494
+ st.download_button(
495
+ label="πŸ“„ Download Transcription",
496
+ data=st.session_state.audio_transcription,
497
+ file_name="audio_transcription.txt",
498
+ mime="text/plain"
499
+ )
500
+
501
+ with col2:
502
+ # Downloadable audio file
503
+ st.download_button(
504
+ label="🎡 Download Processed Audio",
505
+ data=st.session_state.audio_wav_audio_file,
506
+ file_name="processed_audio.wav",
507
+ mime="audio/wav"
508
+ )
509
+
510
+ # Footer
511
+ st.markdown("---")
512
+ st.markdown("Built with ❀️ using Streamlit, MoviePy, and HuggingFace Transformers")