shukdevdatta123 commited on
Commit
efa4923
·
verified ·
1 Parent(s): 972546b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -1
app.py CHANGED
@@ -1 +1,200 @@
1
- st.markdown("Hello World")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st #
2
+ import moviepy.editor as mp
3
+ import speech_recognition as sr
4
+ from pydub import AudioSegment
5
+ import tempfile
6
+ import os
7
+ import io
8
+ from transformers import pipeline
9
+ import matplotlib.pyplot as plt
10
+
11
+ # Function to convert video to audio
12
+ def video_to_audio(video_file):
13
+ # Load the video using moviepy
14
+ video = mp.VideoFileClip(video_file)
15
+
16
+ # Extract audio
17
+ audio = video.audio
18
+ temp_audio_path = tempfile.mktemp(suffix=".mp3")
19
+
20
+ # Write the audio to a file
21
+ audio.write_audiofile(temp_audio_path)
22
+ return temp_audio_path
23
+
24
+ # Function to convert MP3 audio to WAV
25
+ def convert_mp3_to_wav(mp3_file):
26
+ # Load the MP3 file using pydub
27
+ audio = AudioSegment.from_mp3(mp3_file)
28
+
29
+ # Create a temporary WAV file
30
+ temp_wav_path = tempfile.mktemp(suffix=".wav")
31
+
32
+ # Export the audio to the temporary WAV file
33
+ audio.export(temp_wav_path, format="wav")
34
+ return temp_wav_path
35
+
36
+ # Function to transcribe audio to text
37
+ def transcribe_audio(audio_file):
38
+ # Initialize recognizer
39
+ recognizer = sr.Recognizer()
40
+
41
+ # Load the audio file using speech_recognition
42
+ audio = sr.AudioFile(audio_file)
43
+
44
+ with audio as source:
45
+ audio_data = recognizer.record(source)
46
+
47
+ try:
48
+ # Transcribe the audio data to text using Google Web Speech API
49
+ text = recognizer.recognize_google(audio_data)
50
+ return text
51
+ except sr.UnknownValueError:
52
+ return "Audio could not be understood."
53
+ except sr.RequestError:
54
+ return "Could not request results from Google Speech Recognition service."
55
+
56
+ # Function to perform emotion detection using Hugging Face transformers
57
+ def detect_emotion(text):
58
+ # Load emotion detection pipeline
59
+ emotion_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
60
+
61
+ # Get the emotion predictions
62
+ result = emotion_pipeline(text)
63
+
64
+ # Extract the emotion with the highest score
65
+ emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
66
+ return emotions
67
+
68
+ # Streamlit app layout
69
+ st.title("Video and Audio to Text Transcription with Emotion Detection and Visualization")
70
+ st.write("Upload a video or audio file to convert it to transcription, detect emotions, and visualize the audio waveform.")
71
+
72
+ # Create tabs to separate video and audio uploads
73
+ tab = st.selectbox("Select the type of file to upload", ["Video", "Audio"])
74
+
75
+ if tab == "Video":
76
+ # File uploader for video
77
+ uploaded_video = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"])
78
+
79
+ if uploaded_video is not None:
80
+ # Save the uploaded video file temporarily
81
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_video:
82
+ tmp_video.write(uploaded_video.read())
83
+ tmp_video_path = tmp_video.name
84
+
85
+ # Add an "Analyze Video" button
86
+ if st.button("Analyze Video"):
87
+ with st.spinner("Processing video... Please wait."):
88
+
89
+ # Convert video to audio
90
+ audio_file = video_to_audio(tmp_video_path)
91
+
92
+ # Convert the extracted MP3 audio to WAV
93
+ wav_audio_file = convert_mp3_to_wav(audio_file)
94
+
95
+ # Transcribe audio to text
96
+ transcription = transcribe_audio(wav_audio_file)
97
+
98
+ # Show the transcription
99
+ st.text_area("Transcription", transcription, height=300)
100
+
101
+ # Emotion detection
102
+ emotions = detect_emotion(transcription)
103
+ st.write(f"Detected Emotions: {emotions}")
104
+
105
+ # Store transcription and audio file in session state
106
+ st.session_state.transcription = transcription
107
+
108
+ # Store the audio file as a BytesIO object in memory
109
+ with open(wav_audio_file, "rb") as f:
110
+ audio_data = f.read()
111
+ st.session_state.wav_audio_file = io.BytesIO(audio_data)
112
+
113
+ # Cleanup temporary files
114
+ os.remove(tmp_video_path)
115
+ os.remove(audio_file)
116
+
117
+ # Check if transcription and audio file are stored in session state
118
+ if 'transcription' in st.session_state and 'wav_audio_file' in st.session_state:
119
+ # Provide the audio file to the user for download
120
+ st.audio(st.session_state.wav_audio_file, format='audio/wav')
121
+
122
+ # Add download buttons for the transcription and audio
123
+ # Downloadable transcription file
124
+ st.download_button(
125
+ label="Download Transcription",
126
+ data=st.session_state.transcription,
127
+ file_name="transcription.txt",
128
+ mime="text/plain"
129
+ )
130
+
131
+ # Downloadable audio file
132
+ st.download_button(
133
+ label="Download Audio",
134
+ data=st.session_state.wav_audio_file,
135
+ file_name="converted_audio.wav",
136
+ mime="audio/wav"
137
+ )
138
+
139
+ elif tab == "Audio":
140
+ # File uploader for audio
141
+ uploaded_audio = st.file_uploader("Upload Audio", type=["wav", "mp3"])
142
+
143
+ if uploaded_audio is not None:
144
+ # Save the uploaded audio file temporarily
145
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_audio:
146
+ tmp_audio.write(uploaded_audio.read())
147
+ tmp_audio_path = tmp_audio.name
148
+
149
+ # Add an "Analyze Audio" button
150
+ if st.button("Analyze Audio"):
151
+ with st.spinner("Processing audio... Please wait."):
152
+
153
+ # Convert audio to WAV if it's in MP3 format
154
+ if uploaded_audio.type == "audio/mpeg":
155
+ wav_audio_file = convert_mp3_to_wav(tmp_audio_path)
156
+ else:
157
+ wav_audio_file = tmp_audio_path
158
+
159
+ # Transcribe audio to text
160
+ transcription = transcribe_audio(wav_audio_file)
161
+
162
+ # Show the transcription
163
+ st.text_area("Transcription", transcription, height=300)
164
+
165
+ # Emotion detection
166
+ emotions = detect_emotion(transcription)
167
+ st.write(f"Detected Emotions: {emotions}")
168
+
169
+ # Store transcription in session state
170
+ st.session_state.transcription_audio = transcription
171
+
172
+ # Store the audio file as a BytesIO object in memory
173
+ with open(wav_audio_file, "rb") as f:
174
+ audio_data = f.read()
175
+ st.session_state.wav_audio_file_audio = io.BytesIO(audio_data)
176
+
177
+ # Cleanup temporary audio file
178
+ os.remove(tmp_audio_path)
179
+
180
+ # Check if transcription and audio file are stored in session state
181
+ if 'transcription_audio' in st.session_state and 'wav_audio_file_audio' in st.session_state:
182
+ # Provide the audio file to the user for download
183
+ st.audio(st.session_state.wav_audio_file_audio, format='audio/wav')
184
+
185
+ # Add download buttons for the transcription and audio
186
+ # Downloadable transcription file
187
+ st.download_button(
188
+ label="Download Transcription",
189
+ data=st.session_state.transcription_audio,
190
+ file_name="transcription_audio.txt",
191
+ mime="text/plain"
192
+ )
193
+
194
+ # Downloadable audio file
195
+ st.download_button(
196
+ label="Download Audio",
197
+ data=st.session_state.wav_audio_file_audio,
198
+ file_name="converted_audio_audio.wav",
199
+ mime="audio/wav"
200
+ )