File size: 4,123 Bytes
f89c8ce
 
 
04676e1
f89c8ce
 
 
8eab835
f89c8ce
990b7d7
04676e1
157a5b0
4ff1681
 
f89c8ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04676e1
 
 
4ff1681
04676e1
 
 
 
157a5b0
04676e1
 
8eab835
04676e1
8eab835
157a5b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04676e1
 
 
 
f89c8ce
 
04676e1
157a5b0
f89c8ce
 
 
 
 
 
 
 
 
 
 
 
3bbf04a
f89c8ce
 
 
 
3bbf04a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import streamlit as st
from moviepy.editor import VideoFileClip
import whisper
from translate import Translator
from gtts import gTTS
import tempfile
import os
import numpy as np

# Initialize Whisper model
try:
    whisper_model = whisper.load_model("base")
except Exception as e:
    st.error(f"Error loading Whisper model: {e}")

# Language options
LANGUAGES = {
    'English': 'en',
    'Tamil': 'ta',
    'Sinhala': 'si',
    'French': 'fr',  # Add more languages as needed
}

st.title("AI Video Translator with Whisper and GTTS")

# Step 1: Upload video file
video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])

if video_file:
    # Step 2: Select translation language
    target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))

    # Process when user clicks translate
    if st.button("Translate Video"):
        # Save video to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
            temp_video.write(video_file.read())
            temp_video_path = temp_video.name

        # Extract audio from video
        try:
            video = VideoFileClip(temp_video_path)
            audio_path = tempfile.mktemp(suffix=".wav")
            video.audio.write_audiofile(audio_path)
        except Exception as e:
            st.error(f"Error extracting audio from video: {e}")
            os.remove(temp_video_path)
            st.stop()

        # Function to transcribe audio in chunks
        def transcribe_audio_in_chunks(audio_path, model, chunk_length=30):
            audio_clip = whisper.load_audio(audio_path)
            audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE  # Calculate duration in seconds
            segments = []

            for start in np.arange(0, audio_duration, chunk_length):
                end = min(start + chunk_length, audio_duration)
                segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)]
                result = model.transcribe(segment)
                segments.append(result['text'])

            return ' '.join(segments)

        # Function to translate text in chunks
        def translate_in_chunks(text, translator, max_length=500):
            words = text.split()
            chunks = []
            current_chunk = ""

            for word in words:
                if len(current_chunk) + len(word) + 1 <= max_length:
                    current_chunk += " " + word if current_chunk else word
                else:
                    chunks.append(current_chunk)
                    current_chunk = word

            if current_chunk:
                chunks.append(current_chunk)

            translated_chunks = [translator.translate(chunk) for chunk in chunks]
            return ' '.join(translated_chunks)

        # Transcribe audio using Whisper
        try:
            original_text = transcribe_audio_in_chunks(audio_path, whisper_model)
            st.write("Original Transcription:", original_text)

            # Translate text to the target language
            translator = Translator(to_lang=LANGUAGES[target_language])
            translated_text = translate_in_chunks(original_text, translator)
            st.write(f"Translated Text ({target_language}):", translated_text)

            # Convert translated text to speech
            tts = gTTS(text=translated_text, lang=LANGUAGES[target_language])
            audio_output_path = tempfile.mktemp(suffix=".mp3")
            tts.save(audio_output_path)

            # Display translated text and audio
            st.success("Translation successful!")
            st.audio(audio_output_path, format="audio/mp3")
        except Exception as e:
            st.error(f"Error during transcription/translation: {e}")
            audio_output_path = None  # Ensure this variable is defined

        # Clean up temporary files
        os.remove(temp_video_path)
        os.remove(audio_path)
        if audio_output_path:  # Only remove if it was created
            os.remove(audio_output_path)