File size: 2,757 Bytes
70b9e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82467b2
 
 
 
 
 
 
 
 
 
 
 
70b9e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82467b2
 
 
 
70b9e48
82467b2
70b9e48
 
 
82467b2
70b9e48
 
 
 
 
 
 
82467b2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
import moviepy.editor as mp
import speech_recognition as sr
from pydub import AudioSegment
import tempfile
import os

# Function to convert video to audio
def video_to_audio(video_file):
    # Load the video using moviepy
    video = mp.VideoFileClip(video_file)
    
    # Extract audio
    audio = video.audio
    temp_audio_path = tempfile.mktemp(suffix=".mp3")
    
    # Write the audio to a file
    audio.write_audiofile(temp_audio_path)
    return temp_audio_path

# Function to convert MP3 audio to WAV
def convert_mp3_to_wav(mp3_file):
    # Load the MP3 file using pydub
    audio = AudioSegment.from_mp3(mp3_file)
    
    # Create a temporary WAV file
    temp_wav_path = tempfile.mktemp(suffix=".wav")
    
    # Export the audio to the temporary WAV file
    audio.export(temp_wav_path, format="wav")
    return temp_wav_path

# Function to transcribe audio to text
def transcribe_audio(audio_file):
    # Initialize recognizer
    recognizer = sr.Recognizer()
    
    # Load the audio file using speech_recognition
    audio = sr.AudioFile(audio_file)
    
    with audio as source:
        audio_data = recognizer.record(source)
    
    try:
        # Transcribe the audio data to text using Google Web Speech API
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Audio could not be understood."
    except sr.RequestError:
        return "Could not request results from Google Speech Recognition service."

# Streamlit app layout
st.title("Video to Audio to Text Transcription")
st.write("Upload a video file, and it will be converted to audio and transcribed into text.")

# File uploader for video
uploaded_video = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"])

if uploaded_video is not None:
    # Save the uploaded video file temporarily
    with tempfile.NamedTemporaryFile(delete=False) as tmp_video:
        tmp_video.write(uploaded_video.read())
        tmp_video_path = tmp_video.name
    
    # Convert video to audio
    st.write("Converting video to audio...")
    audio_file = video_to_audio(tmp_video_path)
    
    # Convert the extracted MP3 audio to WAV
    st.write("Converting audio to WAV...")
    wav_audio_file = convert_mp3_to_wav(audio_file)
    
    # Provide the audio file to the user for download
    st.audio(wav_audio_file, format='audio/wav')
    
    # Transcribe audio to text
    st.write("Transcribing audio to text...")
    transcription = transcribe_audio(wav_audio_file)
    
    # Show the transcription
    st.text_area("Transcription", transcription, height=300)
    
    # Cleanup temporary files
    os.remove(tmp_video_path)
    os.remove(audio_file)
    os.remove(wav_audio_file)