shukdevdatta123 commited on
Commit
878a0a3
·
verified ·
1 Parent(s): 0064167

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -149
app.py CHANGED
@@ -1,189 +1,200 @@
1
- import streamlit as st
2
  import moviepy.editor as mp
3
  import speech_recognition as sr
4
  from pydub import AudioSegment
5
  import tempfile
6
  import os
7
  import io
8
- import requests
9
  from transformers import pipeline
10
  import matplotlib.pyplot as plt
11
- import librosa
12
- import numpy as np
13
-
14
- # Function to download file from URL
15
- def download_file(url):
16
- try:
17
- extension = os.path.splitext(url)[1]
18
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=extension)
19
- with requests.get(url, stream=True) as r:
20
- r.raise_for_status()
21
- for chunk in r.iter_content(chunk_size=8192):
22
- temp_file.write(chunk)
23
- temp_file.close()
24
- return temp_file.name
25
- except Exception as e:
26
- st.error(f"Failed to download file: {e}")
27
- return None
28
 
29
  # Function to convert video to audio
30
  def video_to_audio(video_file):
 
31
  video = mp.VideoFileClip(video_file)
 
 
32
  audio = video.audio
33
  temp_audio_path = tempfile.mktemp(suffix=".mp3")
 
 
34
  audio.write_audiofile(temp_audio_path)
35
  return temp_audio_path
36
 
37
- # Function to convert MP3 to WAV
38
  def convert_mp3_to_wav(mp3_file):
 
39
  audio = AudioSegment.from_mp3(mp3_file)
 
 
40
  temp_wav_path = tempfile.mktemp(suffix=".wav")
 
 
41
  audio.export(temp_wav_path, format="wav")
42
  return temp_wav_path
43
 
44
- # Function to transcribe audio with chunking for large files
45
  def transcribe_audio(audio_file):
46
- audio = AudioSegment.from_wav(audio_file)
47
- duration = len(audio) / 1000 # Duration in seconds
48
- chunk_length = 60 # 60-second chunks
49
  recognizer = sr.Recognizer()
50
 
51
- if duration <= chunk_length:
52
- with sr.AudioFile(audio_file) as source:
53
- audio_data = recognizer.record(source)
54
- try:
55
- text = recognizer.recognize_google(audio_data)
56
- return text
57
- except sr.UnknownValueError:
58
- return "Audio could not be understood."
59
- except sr.RequestError:
60
- return "Could not request results from Google Speech Recognition service."
61
- else:
62
- num_chunks = int(duration // chunk_length) + 1
63
- transcriptions = []
64
- for i in range(num_chunks):
65
- start_time = i * chunk_length * 1000 # in milliseconds
66
- end_time = min((i + 1) * chunk_length * 1000, len(audio))
67
- chunk = audio[start_time:end_time]
68
- frame_data = chunk.raw_data
69
- sample_rate = audio.frame_rate
70
- sample_width = audio.sample_width
71
- audio_data = sr.AudioData(frame_data, sample_rate, sample_width)
72
- try:
73
- text = recognizer.recognize_google(audio_data)
74
- transcriptions.append(text)
75
- except sr.UnknownValueError:
76
- transcriptions.append("[Audio could not be understood.]")
77
- except sr.RequestError:
78
- transcriptions.append("[Could not request results.]")
79
- return " ".join(transcriptions)
80
-
81
- # Function to detect emotions
82
  def detect_emotion(text):
 
83
  emotion_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
 
 
84
  result = emotion_pipeline(text)
 
 
85
  emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
86
  return emotions
87
 
88
- # Function to plot audio waveform
89
- def plot_waveform(audio_data, duration=10):
90
- audio_data.seek(0)
91
- y, sr = librosa.load(audio_data, sr=None, duration=duration)
92
- plt.figure(figsize=(10, 4))
93
- time = np.linspace(0, len(y)/sr, len(y))
94
- plt.plot(time, y)
95
- plt.title(f"Audio Waveform (first {duration} seconds)")
96
- plt.xlabel("Time (s)")
97
- plt.ylabel("Amplitude")
98
- st.pyplot(plt)
99
-
100
  # Streamlit app layout
101
  st.title("Video and Audio to Text Transcription with Emotion Detection and Visualization")
102
- st.write("Upload a video or audio file, or provide a URL to a large file (up to 1GB).")
103
- st.write("**Note:** Direct file uploads are limited to 200MB. For larger files, please provide a URL.")
104
 
105
- tab = st.selectbox("Select file type", ["Video", "Audio"])
 
106
 
107
  if tab == "Video":
108
- method = st.radio("Choose how to provide the video file:", ["Upload file", "Provide URL"])
109
- if method == "Upload file":
110
- uploaded_file = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"])
111
- elif method == "Provide URL":
112
- url = st.text_input("Enter video URL")
113
- if st.button("Analyze Video"):
114
- if method == "Upload file" and uploaded_file:
115
- with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file:
116
- tmp_file.write(uploaded_file.read())
117
- file_path = tmp_file.name
118
- elif method == "Provide URL" and url:
119
- with st.spinner("Downloading video... This may take a while for large files."):
120
- file_path = download_file(url)
121
- if file_path is None:
122
- st.error("Failed to download the file. Please check the URL and try again.")
123
- st.stop()
124
- else:
125
- st.error("Please provide a file or URL.")
126
- st.stop()
127
- # Process the video file
128
- with st.spinner("Processing video..."):
129
- audio_file = video_to_audio(file_path)
130
- wav_audio_file = convert_mp3_to_wav(audio_file)
131
- transcription = transcribe_audio(wav_audio_file)
132
- st.text_area("Transcription", transcription, height=300)
133
- emotions = detect_emotion(transcription)
134
- st.write(f"Detected Emotions: {emotions}")
135
- with open(wav_audio_file, "rb") as f:
136
- audio_data = io.BytesIO(f.read())
137
- st.session_state.wav_audio_file = audio_data
138
- plot_waveform(st.session_state.wav_audio_file)
139
- # Cleanup
140
- os.remove(file_path)
141
- os.remove(audio_file)
142
- os.remove(wav_audio_file)
143
- if 'wav_audio_file' in st.session_state:
 
 
 
 
 
 
 
 
144
  st.audio(st.session_state.wav_audio_file, format='audio/wav')
145
- st.download_button("Download Transcription", transcription, "transcription.txt", "text/plain")
146
- st.download_button("Download Audio", st.session_state.wav_audio_file, "converted_audio.wav", "audio/wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  elif tab == "Audio":
149
- method = st.radio("Choose how to provide the audio file:", ["Upload file", "Provide URL"])
150
- if method == "Upload file":
151
- uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"])
152
- elif method == "Provide URL":
153
- url = st.text_input("Enter audio URL")
154
- if st.button("Analyze Audio"):
155
- if method == "Upload file" and uploaded_file:
156
- with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3' if uploaded_file.type == "audio/mpeg" else '.wav') as tmp_file:
157
- tmp_file.write(uploaded_file.read())
158
- file_path = tmp_file.name
159
- elif method == "Provide URL" and url:
160
- with st.spinner("Downloading audio... This may take a while for large files."):
161
- file_path = download_file(url)
162
- if file_path is None:
163
- st.error("Failed to download the file. Please check the URL and try again.")
164
- st.stop()
165
- else:
166
- st.error("Please provide a file or URL.")
167
- st.stop()
168
- # Process the audio file
169
- with st.spinner("Processing audio..."):
170
- if file_path.endswith('.mp3'):
171
- wav_audio_file = convert_mp3_to_wav(file_path)
172
- else:
173
- wav_audio_file = file_path
174
- transcription = transcribe_audio(wav_audio_file)
175
- st.text_area("Transcription", transcription, height=300)
176
- emotions = detect_emotion(transcription)
177
- st.write(f"Detected Emotions: {emotions}")
178
- with open(wav_audio_file, "rb") as f:
179
- audio_data = io.BytesIO(f.read())
180
- st.session_state.wav_audio_file_audio = audio_data
181
- plot_waveform(st.session_state.wav_audio_file_audio)
182
- # Cleanup
183
- if file_path != wav_audio_file:
184
- os.remove(file_path)
185
- os.remove(wav_audio_file)
186
- if 'wav_audio_file_audio' in st.session_state:
187
- st.audio(st.session_state.wav_audio_file_audio, format='audio/wav')
188
- st.download_button("Download Transcription", transcription, "transcription_audio.txt", "text/plain")
189
- st.download_button("Download Audio", st.session_state.wav_audio_file_audio, "converted_audio_audio.wav", "audio/wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st #
2
  import moviepy.editor as mp
3
  import speech_recognition as sr
4
  from pydub import AudioSegment
5
  import tempfile
6
  import os
7
  import io
 
8
  from transformers import pipeline
9
  import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Function to convert video to audio
12
  def video_to_audio(video_file):
13
+ # Load the video using moviepy
14
  video = mp.VideoFileClip(video_file)
15
+
16
+ # Extract audio
17
  audio = video.audio
18
  temp_audio_path = tempfile.mktemp(suffix=".mp3")
19
+
20
+ # Write the audio to a file
21
  audio.write_audiofile(temp_audio_path)
22
  return temp_audio_path
23
 
24
+ # Function to convert MP3 audio to WAV
25
  def convert_mp3_to_wav(mp3_file):
26
+ # Load the MP3 file using pydub
27
  audio = AudioSegment.from_mp3(mp3_file)
28
+
29
+ # Create a temporary WAV file
30
  temp_wav_path = tempfile.mktemp(suffix=".wav")
31
+
32
+ # Export the audio to the temporary WAV file
33
  audio.export(temp_wav_path, format="wav")
34
  return temp_wav_path
35
 
36
+ # Function to transcribe audio to text
37
  def transcribe_audio(audio_file):
38
+ # Initialize recognizer
 
 
39
  recognizer = sr.Recognizer()
40
 
41
+ # Load the audio file using speech_recognition
42
+ audio = sr.AudioFile(audio_file)
43
+
44
+ with audio as source:
45
+ audio_data = recognizer.record(source)
46
+
47
+ try:
48
+ # Transcribe the audio data to text using Google Web Speech API
49
+ text = recognizer.recognize_google(audio_data)
50
+ return text
51
+ except sr.UnknownValueError:
52
+ return "Audio could not be understood."
53
+ except sr.RequestError:
54
+ return "Could not request results from Google Speech Recognition service."
55
+
56
+ # Function to perform emotion detection using Hugging Face transformers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def detect_emotion(text):
58
+ # Load emotion detection pipeline
59
  emotion_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
60
+
61
+ # Get the emotion predictions
62
  result = emotion_pipeline(text)
63
+
64
+ # Extract the emotion with the highest score
65
  emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
66
  return emotions
67
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  # Streamlit app layout
69
  st.title("Video and Audio to Text Transcription with Emotion Detection and Visualization")
70
+ st.write("Upload a video or audio file to convert it to transcription, detect emotions, and visualize the audio waveform.")
 
71
 
72
+ # Create tabs to separate video and audio uploads
73
+ tab = st.selectbox("Select the type of file to upload", ["Video", "Audio"])
74
 
75
  if tab == "Video":
76
+ # File uploader for video
77
+ uploaded_video = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"])
78
+
79
+ if uploaded_video is not None:
80
+ # Save the uploaded video file temporarily
81
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_video:
82
+ tmp_video.write(uploaded_video.read())
83
+ tmp_video_path = tmp_video.name
84
+
85
+ # Add an "Analyze Video" button
86
+ if st.button("Analyze Video"):
87
+ with st.spinner("Processing video... Please wait."):
88
+
89
+ # Convert video to audio
90
+ audio_file = video_to_audio(tmp_video_path)
91
+
92
+ # Convert the extracted MP3 audio to WAV
93
+ wav_audio_file = convert_mp3_to_wav(audio_file)
94
+
95
+ # Transcribe audio to text
96
+ transcription = transcribe_audio(wav_audio_file)
97
+
98
+ # Show the transcription
99
+ st.text_area("Transcription", transcription, height=300)
100
+
101
+ # Emotion detection
102
+ emotions = detect_emotion(transcription)
103
+ st.write(f"Detected Emotions: {emotions}")
104
+
105
+ # Store transcription and audio file in session state
106
+ st.session_state.transcription = transcription
107
+
108
+ # Store the audio file as a BytesIO object in memory
109
+ with open(wav_audio_file, "rb") as f:
110
+ audio_data = f.read()
111
+ st.session_state.wav_audio_file = io.BytesIO(audio_data)
112
+
113
+ # Cleanup temporary files
114
+ os.remove(tmp_video_path)
115
+ os.remove(audio_file)
116
+
117
+ # Check if transcription and audio file are stored in session state
118
+ if 'transcription' in st.session_state and 'wav_audio_file' in st.session_state:
119
+ # Provide the audio file to the user for download
120
  st.audio(st.session_state.wav_audio_file, format='audio/wav')
121
+
122
+ # Add download buttons for the transcription and audio
123
+ # Downloadable transcription file
124
+ st.download_button(
125
+ label="Download Transcription",
126
+ data=st.session_state.transcription,
127
+ file_name="transcription.txt",
128
+ mime="text/plain"
129
+ )
130
+
131
+ # Downloadable audio file
132
+ st.download_button(
133
+ label="Download Audio",
134
+ data=st.session_state.wav_audio_file,
135
+ file_name="converted_audio.wav",
136
+ mime="audio/wav"
137
+ )
138
 
139
  elif tab == "Audio":
140
+ # File uploader for audio
141
+ uploaded_audio = st.file_uploader("Upload Audio", type=["wav", "mp3"])
142
+
143
+ if uploaded_audio is not None:
144
+ # Save the uploaded audio file temporarily
145
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_audio:
146
+ tmp_audio.write(uploaded_audio.read())
147
+ tmp_audio_path = tmp_audio.name
148
+
149
+ # Add an "Analyze Audio" button
150
+ if st.button("Analyze Audio"):
151
+ with st.spinner("Processing audio... Please wait."):
152
+
153
+ # Convert audio to WAV if it's in MP3 format
154
+ if uploaded_audio.type == "audio/mpeg":
155
+ wav_audio_file = convert_mp3_to_wav(tmp_audio_path)
156
+ else:
157
+ wav_audio_file = tmp_audio_path
158
+
159
+ # Transcribe audio to text
160
+ transcription = transcribe_audio(wav_audio_file)
161
+
162
+ # Show the transcription
163
+ st.text_area("Transcription", transcription, height=300)
164
+
165
+ # Emotion detection
166
+ emotions = detect_emotion(transcription)
167
+ st.write(f"Detected Emotions: {emotions}")
168
+
169
+ # Store transcription in session state
170
+ st.session_state.transcription_audio = transcription
171
+
172
+ # Store the audio file as a BytesIO object in memory
173
+ with open(wav_audio_file, "rb") as f:
174
+ audio_data = f.read()
175
+ st.session_state.wav_audio_file_audio = io.BytesIO(audio_data)
176
+
177
+ # Cleanup temporary audio file
178
+ os.remove(tmp_audio_path)
179
+
180
+ # Check if transcription and audio file are stored in session state
181
+ if 'transcription_audio' in st.session_state and 'wav_audio_file_audio' in st.session_state:
182
+ # Provide the audio file to the user for download
183
+ st.audio(st.session_state.wav_audio_file_audio, format='audio/wav')
184
+
185
+ # Add download buttons for the transcription and audio
186
+ # Downloadable transcription file
187
+ st.download_button(
188
+ label="Download Transcription",
189
+ data=st.session_state.transcription_audio,
190
+ file_name="transcription_audio.txt",
191
+ mime="text/plain"
192
+ )
193
+
194
+ # Downloadable audio file
195
+ st.download_button(
196
+ label="Download Audio",
197
+ data=st.session_state.wav_audio_file_audio,
198
+ file_name="converted_audio_audio.wav",
199
+ mime="audio/wav"
200
+ )