File size: 9,754 Bytes
70b9e48
 
 
 
 
 
98851f3
e50410b
 
70b9e48
 
 
 
 
 
 
 
 
 
 
 
 
 
82467b2
 
 
 
 
 
 
 
 
 
 
 
70b9e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e50410b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70b9e48
e50410b
 
575bce9
e50410b
 
575bce9
3e2d79c
 
 
575bce9
3e2d79c
 
 
 
 
8974aa3
3e2d79c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98851f3
 
 
 
 
3e2d79c
 
 
 
 
 
 
 
98851f3
3e2d79c
 
 
8974aa3
3e2d79c
 
 
 
 
 
 
98851f3
 
 
 
 
 
3e2d79c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98851f3
 
 
 
 
3e2d79c
 
 
 
 
 
 
98851f3
3e2d79c
 
 
 
 
 
 
 
 
 
 
98851f3
 
 
 
 
e50410b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98851f3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
import streamlit as st
import moviepy.editor as mp
import speech_recognition as sr
from pydub import AudioSegment
import tempfile
import os
import io
from pytube import YouTube
import requests

# Function to convert video to audio
def video_to_audio(video_file):
    # Load the video using moviepy
    video = mp.VideoFileClip(video_file)
    
    # Extract audio
    audio = video.audio
    temp_audio_path = tempfile.mktemp(suffix=".mp3")
    
    # Write the audio to a file
    audio.write_audiofile(temp_audio_path)
    return temp_audio_path

# Function to convert MP3 audio to WAV
def convert_mp3_to_wav(mp3_file):
    # Load the MP3 file using pydub
    audio = AudioSegment.from_mp3(mp3_file)
    
    # Create a temporary WAV file
    temp_wav_path = tempfile.mktemp(suffix=".wav")
    
    # Export the audio to the temporary WAV file
    audio.export(temp_wav_path, format="wav")
    return temp_wav_path

# Function to transcribe audio to text
def transcribe_audio(audio_file):
    # Initialize recognizer
    recognizer = sr.Recognizer()
    
    # Load the audio file using speech_recognition
    audio = sr.AudioFile(audio_file)
    
    with audio as source:
        audio_data = recognizer.record(source)
    
    try:
        # Transcribe the audio data to text using Google Web Speech API
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Audio could not be understood."
    except sr.RequestError:
        return "Could not request results from Google Speech Recognition service."

# Function to download audio from YouTube and convert it to WAV
def download_youtube_audio(url):
    # Get the YouTube video
    yt = YouTube(url)
    
    # Get the highest quality stream available (audio only)
    audio_stream = yt.streams.filter(only_audio=True).first()
    
    # Download the audio as an MP4 file (audio-only)
    temp_audio_path = tempfile.mktemp(suffix=".mp4")
    audio_stream.download(output_path=temp_audio_path)
    
    # Convert the downloaded MP4 to WAV format
    wav_audio_file = convert_mp3_to_wav(temp_audio_path)
    
    # Cleanup the temporary MP4 file
    os.remove(temp_audio_path)
    
    return wav_audio_file

# Streamlit app layout
st.title("Video, Audio, and YouTube to Text Transcription")
st.write("Upload a video, audio file, or provide a YouTube URL to convert it to transcription.")

# Create tabs to separate video, audio, and YouTube URL uploads
tab = st.selectbox("Select the type of file to upload", ["Video", "Audio", "YouTube URL"])

if tab == "Video":
    # File uploader for video
    uploaded_video = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"])

    if uploaded_video is not None:
        # Save the uploaded video file temporarily
        with tempfile.NamedTemporaryFile(delete=False) as tmp_video:
            tmp_video.write(uploaded_video.read())
            tmp_video_path = tmp_video.name

        # Add an "Analyze Video" button
        if st.button("Analyze Video"):
            with st.spinner("Processing video... Please wait."):
                # Convert video to audio
                audio_file = video_to_audio(tmp_video_path)
                
                # Convert the extracted MP3 audio to WAV
                wav_audio_file = convert_mp3_to_wav(audio_file)
                
                # Transcribe audio to text
                transcription = transcribe_audio(wav_audio_file)

                # Show the transcription
                st.text_area("Transcription", transcription, height=300)

                # Store transcription and audio file in session state
                st.session_state.transcription = transcription
                
                # Store the audio file as a BytesIO object in memory
                with open(wav_audio_file, "rb") as f:
                    audio_data = f.read()
                    st.session_state.wav_audio_file = io.BytesIO(audio_data)

                # Cleanup temporary files
                os.remove(tmp_video_path)
                os.remove(audio_file)

    # Check if transcription and audio file are stored in session state
    if 'transcription' in st.session_state and 'wav_audio_file' in st.session_state:
        # Provide the audio file to the user for download
        st.audio(st.session_state.wav_audio_file, format='audio/wav')
        
        # Add download buttons for the transcription and audio
        # Downloadable transcription file
        st.download_button(
            label="Download Transcription",
            data=st.session_state.transcription,
            file_name="transcription.txt",
            mime="text/plain"
        )
        
        # Downloadable audio file
        st.download_button(
            label="Download Audio",
            data=st.session_state.wav_audio_file,
            file_name="converted_audio.wav",
            mime="audio/wav"
        )

elif tab == "Audio":
    # File uploader for audio
    uploaded_audio = st.file_uploader("Upload Audio", type=["wav", "mp3"])

    if uploaded_audio is not None:
        # Save the uploaded audio file temporarily
        with tempfile.NamedTemporaryFile(delete=False) as tmp_audio:
            tmp_audio.write(uploaded_audio.read())
            tmp_audio_path = tmp_audio.name

        # Add an "Analyze Audio" button
        if st.button("Analyze Audio"):
            with st.spinner("Processing audio... Please wait."):
                # Convert audio to WAV if it's in MP3 format
                if uploaded_audio.type == "audio/mpeg":
                    wav_audio_file = convert_mp3_to_wav(tmp_audio_path)
                else:
                    wav_audio_file = tmp_audio_path
                
                # Transcribe audio to text
                transcription = transcribe_audio(wav_audio_file)

                # Show the transcription
                st.text_area("Transcription", transcription, height=300)

                # Store transcription in session state
                st.session_state.transcription_audio = transcription
                
                # Store the audio file as a BytesIO object in memory
                with open(wav_audio_file, "rb") as f:
                    audio_data = f.read()
                    st.session_state.wav_audio_file_audio = io.BytesIO(audio_data)

                # Cleanup temporary audio file
                os.remove(tmp_audio_path)

        # Check if transcription and audio file are stored in session state
        if 'transcription_audio' in st.session_state and 'wav_audio_file_audio' in st.session_state:
            # Provide the audio file to the user for download
            st.audio(st.session_state.wav_audio_file_audio, format='audio/wav')
            
            # Add download buttons for the transcription and audio
            # Downloadable transcription file
            st.download_button(
                label="Download Transcription",
                data=st.session_state.transcription_audio,
                file_name="transcription_audio.txt",
                mime="text/plain"
            )
            
            # Downloadable audio file
            st.download_button(
                label="Download Audio",
                data=st.session_state.wav_audio_file_audio,
                file_name="converted_audio_audio.wav",
                mime="audio/wav"
            )

elif tab == "YouTube URL":
    # Input for YouTube URL
    youtube_url = st.text_input("Enter YouTube URL")

    if youtube_url:
        # Add an "Analyze YouTube URL" button
        if st.button("Analyze YouTube URL"):
            with st.spinner("Processing YouTube video... Please wait."):
                try:
                    # Download audio from the YouTube video
                    wav_audio_file = download_youtube_audio(youtube_url)
                    
                    # Transcribe audio to text
                    transcription = transcribe_audio(wav_audio_file)

                    # Show the transcription
                    st.text_area("Transcription", transcription, height=300)

                    # Store transcription and audio file in session state
                    st.session_state.transcription_youtube = transcription
                    
                    # Store the audio file as a BytesIO object in memory
                    with open(wav_audio_file, "rb") as f:
                        audio_data = f.read()
                        st.session_state.wav_audio_file_youtube = io.BytesIO(audio_data)

                    # Cleanup the temporary audio file
                    os.remove(wav_audio_file)

                except Exception as e:
                    st.error(f"Error processing the YouTube URL: {e}")

        # Check if transcription and audio file are stored in session state
        if 'transcription_youtube' in st.session_state and 'wav_audio_file_youtube' in st.session_state:
            # Provide the audio file to the user for download
            st.audio(st.session_state.wav_audio_file_youtube, format='audio/wav')
            
            # Add download buttons for the transcription and audio
            # Downloadable transcription file
            st.download_button(
                label="Download Transcription",
                data=st.session_state.transcription_youtube,
                file_name="transcription_youtube.txt",
                mime="text/plain"
            )
            
            # Downloadable audio file
            st.download_button(
                label="Download Audio",
                data=st.session_state.wav_audio_file_youtube,
                file_name="converted_audio_youtube.wav",
                mime="audio/wav"
            )