shukdevdatta123 commited on
Commit
972a238
·
verified ·
1 Parent(s): ee377d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -488
app.py CHANGED
@@ -7,506 +7,134 @@ import os
7
  import io
8
  from transformers import pipeline
9
  import matplotlib.pyplot as plt
10
- import gc
11
- import warnings
12
- warnings.filterwarnings("ignore")
13
-
14
- # Configure Streamlit for large file uploads
15
- st.set_page_config(
16
- page_title="Video/Audio Transcription with Emotion Detection",
17
- page_icon="🎬",
18
- layout="wide"
19
- )
20
-
21
- # Set maximum upload size (this needs to be set before any file upload widgets)
22
- # Note: You'll also need to configure this in your Streamlit config file or environment
23
- @st.cache_data
24
- def get_config():
25
- return {"maxUploadSize": 1024} # 1GB in MB
26
-
27
- # Function to convert video to audio with progress tracking
28
- def video_to_audio(video_file, progress_callback=None):
29
- """Convert video to audio with memory optimization"""
30
- try:
31
- # Load the video using moviepy with memory optimization
32
- video = mp.VideoFileClip(video_file)
33
-
34
- # Extract audio
35
- audio = video.audio
36
- temp_audio_path = tempfile.mktemp(suffix=".mp3")
37
-
38
- # Write the audio to a file with progress tracking
39
- if progress_callback:
40
- progress_callback(50) # 50% progress
41
-
42
- audio.write_audiofile(temp_audio_path, verbose=False, logger=None)
43
-
44
- # Clean up video object to free memory
45
- audio.close()
46
- video.close()
47
- del video, audio
48
- gc.collect()
49
-
50
- if progress_callback:
51
- progress_callback(100) # 100% progress
52
-
53
- return temp_audio_path
54
- except Exception as e:
55
- st.error(f"Error converting video to audio: {str(e)}")
56
- return None
57
-
58
- # Function to convert MP3 audio to WAV
59
  def convert_mp3_to_wav(mp3_file):
60
- """Convert MP3 to WAV with memory optimization"""
61
- try:
62
- # Load the MP3 file using pydub
63
- audio = AudioSegment.from_mp3(mp3_file)
64
-
65
- # Create a temporary WAV file
66
- temp_wav_path = tempfile.mktemp(suffix=".wav")
67
-
68
- # Export the audio to the temporary WAV file
69
- audio.export(temp_wav_path, format="wav")
70
-
71
- # Clean up to free memory
72
- del audio
73
- gc.collect()
74
-
75
- return temp_wav_path
76
- except Exception as e:
77
- st.error(f"Error converting MP3 to WAV: {str(e)}")
78
- return None
79
-
80
- # Function to transcribe audio to text with chunking for large files
81
- def transcribe_audio(audio_file, chunk_duration=60):
82
- """Transcribe audio to text with chunking for large files"""
83
- try:
84
- # Initialize recognizer
85
- recognizer = sr.Recognizer()
86
-
87
- # Load audio and get duration
88
- audio_segment = AudioSegment.from_wav(audio_file)
89
- duration = len(audio_segment) / 1000 # Duration in seconds
90
-
91
  transcriptions = []
92
-
93
- # If audio is longer than chunk_duration, split it
94
- if duration > chunk_duration:
95
- num_chunks = int(duration / chunk_duration) + 1
96
-
97
- for i in range(num_chunks):
98
- start_time = i * chunk_duration * 1000 # Convert to milliseconds
99
- end_time = min((i + 1) * chunk_duration * 1000, len(audio_segment))
100
-
101
- # Extract chunk
102
- chunk = audio_segment[start_time:end_time]
103
-
104
- # Save chunk temporarily
105
- chunk_path = tempfile.mktemp(suffix=".wav")
106
- chunk.export(chunk_path, format="wav")
107
-
108
- # Transcribe chunk
109
- try:
110
- with sr.AudioFile(chunk_path) as source:
111
- audio_data = recognizer.record(source)
112
- text = recognizer.recognize_google(audio_data)
113
- transcriptions.append(text)
114
- except (sr.UnknownValueError, sr.RequestError):
115
- transcriptions.append(f"[Chunk {i+1}: Audio could not be transcribed]")
116
-
117
- # Clean up chunk file
118
- os.remove(chunk_path)
119
-
120
- # Update progress
121
- progress = int(((i + 1) / num_chunks) * 100)
122
- st.progress(progress / 100, text=f"Transcribing... {progress}%")
123
-
124
- else:
125
- # For shorter audio, transcribe directly
126
- with sr.AudioFile(audio_file) as source:
127
- audio_data = recognizer.record(source)
128
  text = recognizer.recognize_google(audio_data)
129
  transcriptions.append(text)
130
-
131
- # Join all transcriptions
132
- full_transcription = " ".join(transcriptions)
133
-
134
- # Clean up
135
- del audio_segment
136
- gc.collect()
137
-
138
- return full_transcription
139
-
140
- except sr.UnknownValueError:
141
- return "Audio could not be understood."
142
- except sr.RequestError as e:
143
- return f"Could not request results from Google Speech Recognition service: {str(e)}"
144
- except Exception as e:
145
- return f"Error during transcription: {str(e)}"
146
-
147
- # Function to perform emotion detection using Hugging Face transformers
148
- @st.cache_resource
149
- def load_emotion_model():
150
- """Load emotion detection model (cached)"""
151
- return pipeline("text-classification",
152
- model="j-hartmann/emotion-english-distilroberta-base",
153
- return_all_scores=True)
154
 
 
155
  def detect_emotion(text):
156
- """Detect emotions in text"""
157
- try:
158
- emotion_pipeline = load_emotion_model()
159
-
160
- # Split text into chunks if it's too long (model has token limits)
161
- max_length = 500
162
- if len(text) > max_length:
163
- chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
164
- all_emotions = {}
165
-
166
- for chunk in chunks:
167
- result = emotion_pipeline(chunk)
168
- chunk_emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
169
-
170
- # Aggregate emotions
171
- for emotion, score in chunk_emotions.items():
172
- if emotion in all_emotions:
173
- all_emotions[emotion] = (all_emotions[emotion] + score) / 2
174
- else:
175
- all_emotions[emotion] = score
176
-
177
- return all_emotions
178
- else:
179
- result = emotion_pipeline(text)
180
- emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
181
- return emotions
182
-
183
- except Exception as e:
184
- st.error(f"Error in emotion detection: {str(e)}")
185
- return {"error": "Could not analyze emotions"}
186
-
187
- # Function to visualize emotions
188
- def plot_emotions(emotions):
189
- """Create a bar chart of emotions"""
190
- if "error" in emotions:
191
- return None
192
-
193
- fig, ax = plt.subplots(figsize=(10, 6))
194
- emotions_sorted = dict(sorted(emotions.items(), key=lambda x: x[1], reverse=True))
195
-
196
- colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8']
197
- bars = ax.bar(emotions_sorted.keys(), emotions_sorted.values(),
198
- color=colors[:len(emotions_sorted)])
199
-
200
- ax.set_xlabel('Emotions')
201
- ax.set_ylabel('Confidence Score')
202
- ax.set_title('Emotion Detection Results')
203
- ax.set_ylim(0, 1)
204
-
205
- # Add value labels on bars
206
- for bar in bars:
207
- height = bar.get_height()
208
- ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
209
- f'{height:.3f}', ha='center', va='bottom')
210
-
211
- plt.xticks(rotation=45)
212
- plt.tight_layout()
213
- return fig
214
 
215
  # Streamlit app layout
216
- st.title("🎬 Video and Audio Transcription with Emotion Detection")
217
- st.write("Upload video files up to 1GB or audio files for transcription and emotion analysis.")
218
-
219
- # Display file size information
220
- st.info("📁 **File Size Limits**: Video files up to 1GB, Audio files up to 500MB")
221
-
222
- # Add instructions for large file uploads
223
- with st.expander("📋 Instructions for Large Files"):
224
- st.write("""
225
- **For optimal performance with large files:**
226
- 1. Ensure stable internet connection
227
- 2. Be patient - large files take time to process
228
- 3. Don't close the browser tab during processing
229
- 4. For very large files, consider splitting them beforehand
230
-
231
- **Supported formats:**
232
- - **Video**: MP4, MOV, AVI
233
- - **Audio**: WAV, MP3
234
- """)
235
-
236
- # Create tabs to separate video and audio uploads
237
- tab1, tab2 = st.tabs(["📹 Video Upload", "🎵 Audio Upload"])
238
 
239
- with tab1:
240
- st.header("Video File Processing")
241
-
242
- # File uploader for video with increased size limit
243
- uploaded_video = st.file_uploader(
244
- "Upload Video File",
245
- type=["mp4", "mov", "avi"],
246
- help="Maximum file size: 1GB"
247
- )
248
 
249
- if uploaded_video is not None:
250
- # Display file information
251
- file_size_mb = uploaded_video.size / (1024 * 1024)
252
- st.info(f"📊 **File Info**: {uploaded_video.name} ({file_size_mb:.1f} MB)")
253
-
254
- # Show video preview for smaller files
255
- if file_size_mb < 100: # Only show preview for files under 100MB
256
- st.video(uploaded_video)
257
-
258
- # Save the uploaded video file temporarily
259
- with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_video:
260
  tmp_video.write(uploaded_video.read())
261
  tmp_video_path = tmp_video.name
262
-
263
- # Add an "Analyze Video" button
264
- if st.button("🔄 Analyze Video", type="primary"):
265
- progress_bar = st.progress(0)
266
- status_text = st.empty()
267
-
268
- try:
269
- with st.spinner("Processing video... This may take several minutes for large files."):
270
-
271
- status_text.text("Step 1/4: Converting video to audio...")
272
- progress_bar.progress(10)
273
-
274
- # Convert video to audio
275
- audio_file = video_to_audio(tmp_video_path,
276
- lambda p: progress_bar.progress(10 + p * 0.3))
277
-
278
- if audio_file is None:
279
- st.error("Failed to extract audio from video.")
280
- st.stop()
281
-
282
- status_text.text("Step 2/4: Converting audio format...")
283
- progress_bar.progress(50)
284
-
285
- # Convert the extracted MP3 audio to WAV
286
- wav_audio_file = convert_mp3_to_wav(audio_file)
287
-
288
- if wav_audio_file is None:
289
- st.error("Failed to convert audio format.")
290
- st.stop()
291
-
292
- status_text.text("Step 3/4: Transcribing audio to text...")
293
- progress_bar.progress(60)
294
-
295
- # Transcribe audio to text
296
- transcription = transcribe_audio(wav_audio_file)
297
-
298
- status_text.text("Step 4/4: Analyzing emotions...")
299
- progress_bar.progress(90)
300
-
301
- # Emotion detection
302
- emotions = detect_emotion(transcription)
303
-
304
- progress_bar.progress(100)
305
- status_text.text("✅ Processing complete!")
306
-
307
- # Display results
308
- st.success("Analysis completed successfully!")
309
-
310
- # Show the transcription
311
- st.subheader("📝 Transcription")
312
- st.text_area("", transcription, height=300, key="video_transcription")
313
-
314
- # Show emotions
315
- st.subheader("😊 Emotion Analysis")
316
- col1, col2 = st.columns([1, 1])
317
-
318
- with col1:
319
- st.write("**Detected Emotions:**")
320
- for emotion, score in emotions.items():
321
- st.write(f"- **{emotion.title()}**: {score:.3f}")
322
-
323
- with col2:
324
- fig = plot_emotions(emotions)
325
- if fig:
326
- st.pyplot(fig)
327
-
328
- # Store results in session state
329
- st.session_state.video_transcription = transcription
330
- st.session_state.video_emotions = emotions
331
-
332
- # Store the audio file as a BytesIO object in memory
333
- with open(wav_audio_file, "rb") as f:
334
- audio_data = f.read()
335
- st.session_state.video_wav_audio_file = io.BytesIO(audio_data)
336
-
337
- # Cleanup temporary files
338
- os.remove(tmp_video_path)
339
- os.remove(audio_file)
340
- os.remove(wav_audio_file)
341
-
342
- except Exception as e:
343
- st.error(f"An error occurred during processing: {str(e)}")
344
- # Clean up files in case of error
345
- try:
346
- os.remove(tmp_video_path)
347
- if 'audio_file' in locals() and audio_file:
348
- os.remove(audio_file)
349
- if 'wav_audio_file' in locals() and wav_audio_file:
350
- os.remove(wav_audio_file)
351
- except:
352
- pass
353
-
354
- # Check if results are stored in session state
355
- if 'video_transcription' in st.session_state and 'video_wav_audio_file' in st.session_state:
356
- st.subheader("📥 Download Results")
357
-
358
- col1, col2, col3 = st.columns(3)
359
-
360
- with col1:
361
- # Provide the audio file to the user for playback
362
- st.audio(st.session_state.video_wav_audio_file, format='audio/wav')
363
-
364
- with col2:
365
- # Downloadable transcription file
366
- st.download_button(
367
- label="📄 Download Transcription",
368
- data=st.session_state.video_transcription,
369
- file_name="video_transcription.txt",
370
- mime="text/plain"
371
- )
372
-
373
- with col3:
374
- # Downloadable audio file
375
- st.download_button(
376
- label="🎵 Download Audio",
377
- data=st.session_state.video_wav_audio_file,
378
- file_name="extracted_audio.wav",
379
- mime="audio/wav"
380
- )
381
-
382
- with tab2:
383
- st.header("Audio File Processing")
384
-
385
- # File uploader for audio
386
- uploaded_audio = st.file_uploader(
387
- "Upload Audio File",
388
- type=["wav", "mp3"],
389
- help="Maximum file size: 500MB"
390
- )
391
-
392
- if uploaded_audio is not None:
393
- # Display file information
394
- file_size_mb = uploaded_audio.size / (1024 * 1024)
395
- st.info(f"📊 **File Info**: {uploaded_audio.name} ({file_size_mb:.1f} MB)")
396
-
397
- # Show audio player
398
- st.audio(uploaded_audio)
399
-
400
- # Save the uploaded audio file temporarily
401
  with tempfile.NamedTemporaryFile(delete=False) as tmp_audio:
402
  tmp_audio.write(uploaded_audio.read())
403
  tmp_audio_path = tmp_audio.name
404
-
405
- # Add an "Analyze Audio" button
406
- if st.button("🔄 Analyze Audio", type="primary"):
407
- progress_bar = st.progress(0)
408
- status_text = st.empty()
409
-
410
- try:
411
- with st.spinner("Processing audio... Please wait."):
412
-
413
- status_text.text("Step 1/3: Converting audio format...")
414
- progress_bar.progress(20)
415
-
416
- # Convert audio to WAV if it's in MP3 format
417
- if uploaded_audio.type == "audio/mpeg":
418
- wav_audio_file = convert_mp3_to_wav(tmp_audio_path)
419
- else:
420
- wav_audio_file = tmp_audio_path
421
-
422
- if wav_audio_file is None:
423
- st.error("Failed to process audio file.")
424
- st.stop()
425
-
426
- status_text.text("Step 2/3: Transcribing audio to text...")
427
- progress_bar.progress(40)
428
-
429
- # Transcribe audio to text
430
- transcription = transcribe_audio(wav_audio_file)
431
-
432
- status_text.text("Step 3/3: Analyzing emotions...")
433
- progress_bar.progress(80)
434
-
435
- # Emotion detection
436
- emotions = detect_emotion(transcription)
437
-
438
- progress_bar.progress(100)
439
- status_text.text("✅ Processing complete!")
440
-
441
- # Display results
442
- st.success("Analysis completed successfully!")
443
-
444
- # Show the transcription
445
- st.subheader("📝 Transcription")
446
- st.text_area("", transcription, height=300, key="audio_transcription")
447
-
448
- # Show emotions
449
- st.subheader("😊 Emotion Analysis")
450
- col1, col2 = st.columns([1, 1])
451
-
452
- with col1:
453
- st.write("**Detected Emotions:**")
454
- for emotion, score in emotions.items():
455
- st.write(f"- **{emotion.title()}**: {score:.3f}")
456
-
457
- with col2:
458
- fig = plot_emotions(emotions)
459
- if fig:
460
- st.pyplot(fig)
461
-
462
- # Store results in session state
463
- st.session_state.audio_transcription = transcription
464
- st.session_state.audio_emotions = emotions
465
-
466
- # Store the audio file as a BytesIO object in memory
467
- with open(wav_audio_file, "rb") as f:
468
- audio_data = f.read()
469
- st.session_state.audio_wav_audio_file = io.BytesIO(audio_data)
470
-
471
- # Cleanup temporary audio file
472
- os.remove(tmp_audio_path)
473
- if wav_audio_file != tmp_audio_path:
474
- os.remove(wav_audio_file)
475
-
476
- except Exception as e:
477
- st.error(f"An error occurred during processing: {str(e)}")
478
- # Clean up files in case of error
479
- try:
480
- os.remove(tmp_audio_path)
481
- if 'wav_audio_file' in locals() and wav_audio_file and wav_audio_file != tmp_audio_path:
482
- os.remove(wav_audio_file)
483
- except:
484
- pass
485
-
486
- # Check if results are stored in session state
487
- if 'audio_transcription' in st.session_state and 'audio_wav_audio_file' in st.session_state:
488
- st.subheader("📥 Download Results")
489
-
490
- col1, col2 = st.columns(2)
491
-
492
- with col1:
493
- # Downloadable transcription file
494
- st.download_button(
495
- label="📄 Download Transcription",
496
- data=st.session_state.audio_transcription,
497
- file_name="audio_transcription.txt",
498
- mime="text/plain"
499
- )
500
-
501
- with col2:
502
- # Downloadable audio file
503
- st.download_button(
504
- label="🎵 Download Processed Audio",
505
- data=st.session_state.audio_wav_audio_file,
506
- file_name="processed_audio.wav",
507
- mime="audio/wav"
508
- )
509
-
510
- # Footer
511
- st.markdown("---")
512
- st.markdown("Built with ❤️ using Streamlit, MoviePy, and HuggingFace Transformers")
 
7
  import io
8
  from transformers import pipeline
9
  import matplotlib.pyplot as plt
10
+ import librosa
11
+ import numpy as np
12
+
13
+ # Function to convert video to audio
14
+ def video_to_audio(video_file):
15
+ video = mp.VideoFileClip(video_file)
16
+ audio = video.audio
17
+ temp_audio_path = tempfile.mktemp(suffix=".mp3")
18
+ audio.write_audiofile(temp_audio_path)
19
+ return temp_audio_path
20
+
21
+ # Function to convert MP3 to WAV
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def convert_mp3_to_wav(mp3_file):
23
+ audio = AudioSegment.from_mp3(mp3_file)
24
+ temp_wav_path = tempfile.mktemp(suffix=".wav")
25
+ audio.export(temp_wav_path, format="wav")
26
+ return temp_wav_path
27
+
28
+ # Function to transcribe audio with chunking for large files
29
+ def transcribe_audio(audio_file):
30
+ audio = AudioSegment.from_wav(audio_file)
31
+ duration = len(audio) / 1000 # Duration in seconds
32
+ chunk_length = 60 # 60-second chunks
33
+ recognizer = sr.Recognizer()
34
+
35
+ if duration <= chunk_length:
36
+ with sr.AudioFile(audio_file) as source:
37
+ audio_data = recognizer.record(source)
38
+ try:
39
+ text = recognizer.recognize_google(audio_data)
40
+ return text
41
+ except sr.UnknownValueError:
42
+ return "Audio could not be understood."
43
+ except sr.RequestError:
44
+ return "Could not request results from Google Speech Recognition service."
45
+ else:
46
+ num_chunks = int(duration // chunk_length) + 1
 
 
 
 
 
 
 
47
  transcriptions = []
48
+ for i in range(num_chunks):
49
+ start_time = i * chunk_length * 1000 # in milliseconds
50
+ end_time = min((i + 1) * chunk_length * 1000, len(audio))
51
+ chunk = audio[start_time:end_time]
52
+ frame_data = chunk.raw_data
53
+ sample_rate = audio.frame_rate
54
+ sample_width = audio.sample_width
55
+ audio_data = sr.AudioData(frame_data, sample_rate, sample_width)
56
+ try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  text = recognizer.recognize_google(audio_data)
58
  transcriptions.append(text)
59
+ except sr.UnknownValueError:
60
+ transcriptions.append("[Audio could not be understood.]")
61
+ except sr.RequestError:
62
+ transcriptions.append("[Could not request results.]")
63
+ return " ".join(transcriptions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # Function to detect emotions
66
  def detect_emotion(text):
67
+ emotion_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
68
+ result = emotion_pipeline(text)
69
+ emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
70
+ return emotions
71
+
72
+ # Function to plot audio waveform
73
+ def plot_waveform(audio_data, duration=10):
74
+ audio_data.seek(0)
75
+ y, sr = librosa.load(audio_data, sr=None, duration=duration)
76
+ plt.figure(figsize=(10, 4))
77
+ time = np.linspace(0, len(y)/sr, len(y))
78
+ plt.plot(time, y)
79
+ plt.title(f"Audio Waveform (first {duration} seconds)")
80
+ plt.xlabel("Time (s)")
81
+ plt.ylabel("Amplitude")
82
+ st.pyplot(plt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  # Streamlit app layout
85
+ st.title("Video and Audio to Text Transcription with Emotion Detection and Visualization")
86
+ st.write("Upload a video or audio file to transcribe it, detect emotions, and visualize the audio waveform.")
87
+ st.write("**Note:** To upload files up to 1GB, run the app with: `streamlit run app.py --server.maxUploadSize=1024`")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ tab = st.selectbox("Select file type", ["Video", "Audio"])
 
 
 
 
 
 
 
 
90
 
91
+ if tab == "Video":
92
+ uploaded_video = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"])
93
+ if uploaded_video:
94
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_video:
 
 
 
 
 
 
 
95
  tmp_video.write(uploaded_video.read())
96
  tmp_video_path = tmp_video.name
97
+ if st.button("Analyze Video"):
98
+ with st.spinner("Processing video..."):
99
+ audio_file = video_to_audio(tmp_video_path)
100
+ wav_audio_file = convert_mp3_to_wav(audio_file)
101
+ transcription = transcribe_audio(wav_audio_file)
102
+ st.text_area("Transcription", transcription, height=300)
103
+ emotions = detect_emotion(transcription)
104
+ st.write(f"Detected Emotions: {emotions}")
105
+ with open(wav_audio_file, "rb") as f:
106
+ audio_data = io.BytesIO(f.read())
107
+ st.session_state.wav_audio_file = audio_data
108
+ plot_waveform(st.session_state.wav_audio_file)
109
+ os.remove(tmp_video_path)
110
+ os.remove(audio_file)
111
+ os.remove(wav_audio_file)
112
+ if 'wav_audio_file' in st.session_state:
113
+ st.audio(st.session_state.wav_audio_file, format='audio/wav')
114
+ st.download_button("Download Transcription", st.session_state.transcription, "transcription.txt", "text/plain")
115
+ st.download_button("Download Audio", st.session_state.wav_audio_file, "converted_audio.wav", "audio/wav")
116
+
117
+ elif tab == "Audio":
118
+ uploaded_audio = st.file_uploader("Upload Audio", type=["wav", "mp3"])
119
+ if uploaded_audio:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  with tempfile.NamedTemporaryFile(delete=False) as tmp_audio:
121
  tmp_audio.write(uploaded_audio.read())
122
  tmp_audio_path = tmp_audio.name
123
+ if st.button("Analyze Audio"):
124
+ with st.spinner("Processing audio..."):
125
+ wav_audio_file = convert_mp3_to_wav(tmp_audio_path) if uploaded_audio.type == "audio/mpeg" else tmp_audio_path
126
+ transcription = transcribe_audio(wav_audio_file)
127
+ st.text_area("Transcription", transcription, height=300)
128
+ emotions = detect_emotion(transcription)
129
+ st.write(f"Detected Emotions: {emotions}")
130
+ with open(wav_audio_file, "rb") as f:
131
+ audio_data = io.BytesIO(f.read())
132
+ st.session_state.wav_audio_file_audio = audio_data
133
+ plot_waveform(st.session_state.wav_audio_file_audio)
134
+ if uploaded_audio.type == "audio/mpeg":
135
+ os.remove(wav_audio_file)
136
+ os.remove(tmp_audio_path)
137
+ if 'wav_audio_file_audio' in st.session_state:
138
+ st.audio(st.session_state.wav_audio_file_audio, format='audio/wav')
139
+ st.download_button("Download Transcription", st.session_state.transcription_audio, "transcription_audio.txt", "text/plain")
140
+ st.download_button("Download Audio", st.session_state.wav_audio_file_audio, "converted_audio_audio.wav", "audio/wav")