import streamlit as st import moviepy.editor as mp import speech_recognition as sr from pydub import AudioSegment import tempfile import os # Function to convert video to audio def video_to_audio(video_file): # Load the video using moviepy video = mp.VideoFileClip(video_file) # Extract audio audio = video.audio temp_audio_path = tempfile.mktemp(suffix=".mp3") # Write the audio to a file audio.write_audiofile(temp_audio_path) return temp_audio_path # Function to transcribe audio to text def transcribe_audio(audio_file): # Initialize recognizer recognizer = sr.Recognizer() # Load the audio file using speech_recognition audio = sr.AudioFile(audio_file) with audio as source: audio_data = recognizer.record(source) try: # Transcribe the audio data to text using Google Web Speech API text = recognizer.recognize_google(audio_data) return text except sr.UnknownValueError: return "Audio could not be understood." except sr.RequestError: return "Could not request results from Google Speech Recognition service." # Streamlit app layout st.title("Video to Audio to Text Transcription") st.write("Upload a video file, and it will be converted to audio and transcribed into text.") # File uploader for video uploaded_video = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"]) if uploaded_video is not None: # Save the uploaded video file temporarily with tempfile.NamedTemporaryFile(delete=False) as tmp_video: tmp_video.write(uploaded_video.read()) tmp_video_path = tmp_video.name # Convert video to audio st.write("Converting video to audio...") audio_file = video_to_audio(tmp_video_path) # Provide the audio file to the user for download st.audio(audio_file, format='audio/mp3') # Transcribe audio to text st.write("Transcribing audio to text...") transcription = transcribe_audio(audio_file) # Show the transcription st.text_area("Transcription", transcription, height=300) # Cleanup temporary files os.remove(tmp_video_path) os.remove(audio_file)