shukdevdatta123 commited on
Commit
e50410b
·
verified ·
1 Parent(s): 98851f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -4
app.py CHANGED
@@ -5,6 +5,8 @@ from pydub import AudioSegment
5
  import tempfile
6
  import os
7
  import io
 
 
8
 
9
  # Function to convert video to audio
10
  def video_to_audio(video_file):
@@ -51,12 +53,32 @@ def transcribe_audio(audio_file):
51
  except sr.RequestError:
52
  return "Could not request results from Google Speech Recognition service."
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  # Streamlit app layout
55
- st.title("Video and Audio to Text Transcription")
56
- st.write("Upload a video or audio file to convert it to transcription.")
57
 
58
- # Create tabs to separate video and audio uploads
59
- tab = st.selectbox("Select the type of file to upload", ["Video", "Audio"])
60
 
61
  if tab == "Video":
62
  # File uploader for video
@@ -173,4 +195,58 @@ elif tab == "Audio":
173
  data=st.session_state.wav_audio_file_audio,
174
  file_name="converted_audio_audio.wav",
175
  mime="audio/wav"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  )
 
5
  import tempfile
6
  import os
7
  import io
8
+ from pytube import YouTube
9
+ import requests
10
 
11
  # Function to convert video to audio
12
  def video_to_audio(video_file):
 
53
  except sr.RequestError:
54
  return "Could not request results from Google Speech Recognition service."
55
 
56
+ # Function to download audio from YouTube and convert it to WAV
57
+ def download_youtube_audio(url):
58
+ # Get the YouTube video
59
+ yt = YouTube(url)
60
+
61
+ # Get the highest quality stream available (audio only)
62
+ audio_stream = yt.streams.filter(only_audio=True).first()
63
+
64
+ # Download the audio as an MP4 file (audio-only)
65
+ temp_audio_path = tempfile.mktemp(suffix=".mp4")
66
+ audio_stream.download(output_path=temp_audio_path)
67
+
68
+ # Convert the downloaded MP4 to WAV format
69
+ wav_audio_file = convert_mp3_to_wav(temp_audio_path)
70
+
71
+ # Cleanup the temporary MP4 file
72
+ os.remove(temp_audio_path)
73
+
74
+ return wav_audio_file
75
+
76
  # Streamlit app layout
77
+ st.title("Video, Audio, and YouTube to Text Transcription")
78
+ st.write("Upload a video, audio file, or provide a YouTube URL to convert it to transcription.")
79
 
80
+ # Create tabs to separate video, audio, and YouTube URL uploads
81
+ tab = st.selectbox("Select the type of file to upload", ["Video", "Audio", "YouTube URL"])
82
 
83
  if tab == "Video":
84
  # File uploader for video
 
195
  data=st.session_state.wav_audio_file_audio,
196
  file_name="converted_audio_audio.wav",
197
  mime="audio/wav"
198
+ )
199
+
200
+ elif tab == "YouTube URL":
201
+ # Input for YouTube URL
202
+ youtube_url = st.text_input("Enter YouTube URL")
203
+
204
+ if youtube_url:
205
+ # Add an "Analyze YouTube URL" button
206
+ if st.button("Analyze YouTube URL"):
207
+ with st.spinner("Processing YouTube video... Please wait."):
208
+ try:
209
+ # Download audio from the YouTube video
210
+ wav_audio_file = download_youtube_audio(youtube_url)
211
+
212
+ # Transcribe audio to text
213
+ transcription = transcribe_audio(wav_audio_file)
214
+
215
+ # Show the transcription
216
+ st.text_area("Transcription", transcription, height=300)
217
+
218
+ # Store transcription and audio file in session state
219
+ st.session_state.transcription_youtube = transcription
220
+
221
+ # Store the audio file as a BytesIO object in memory
222
+ with open(wav_audio_file, "rb") as f:
223
+ audio_data = f.read()
224
+ st.session_state.wav_audio_file_youtube = io.BytesIO(audio_data)
225
+
226
+ # Cleanup the temporary audio file
227
+ os.remove(wav_audio_file)
228
+
229
+ except Exception as e:
230
+ st.error(f"Error processing the YouTube URL: {e}")
231
+
232
+ # Check if transcription and audio file are stored in session state
233
+ if 'transcription_youtube' in st.session_state and 'wav_audio_file_youtube' in st.session_state:
234
+ # Provide the audio file to the user for download
235
+ st.audio(st.session_state.wav_audio_file_youtube, format='audio/wav')
236
+
237
+ # Add download buttons for the transcription and audio
238
+ # Downloadable transcription file
239
+ st.download_button(
240
+ label="Download Transcription",
241
+ data=st.session_state.transcription_youtube,
242
+ file_name="transcription_youtube.txt",
243
+ mime="text/plain"
244
+ )
245
+
246
+ # Downloadable audio file
247
+ st.download_button(
248
+ label="Download Audio",
249
+ data=st.session_state.wav_audio_file_youtube,
250
+ file_name="converted_audio_youtube.wav",
251
+ mime="audio/wav"
252
  )