File size: 4,123 Bytes
f89c8ce 04676e1 f89c8ce 8eab835 f89c8ce 990b7d7 04676e1 157a5b0 4ff1681 f89c8ce 04676e1 4ff1681 04676e1 157a5b0 04676e1 8eab835 04676e1 8eab835 157a5b0 04676e1 f89c8ce 04676e1 157a5b0 f89c8ce 3bbf04a f89c8ce 3bbf04a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import streamlit as st
from moviepy.editor import VideoFileClip
import whisper
from translate import Translator
from gtts import gTTS
import tempfile
import os
import numpy as np
# Initialize Whisper model
try:
whisper_model = whisper.load_model("base")
except Exception as e:
st.error(f"Error loading Whisper model: {e}")
# Language options
LANGUAGES = {
'English': 'en',
'Tamil': 'ta',
'Sinhala': 'si',
'French': 'fr', # Add more languages as needed
}
st.title("AI Video Translator with Whisper and GTTS")
# Step 1: Upload video file
video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])
if video_file:
# Step 2: Select translation language
target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))
# Process when user clicks translate
if st.button("Translate Video"):
# Save video to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
temp_video.write(video_file.read())
temp_video_path = temp_video.name
# Extract audio from video
try:
video = VideoFileClip(temp_video_path)
audio_path = tempfile.mktemp(suffix=".wav")
video.audio.write_audiofile(audio_path)
except Exception as e:
st.error(f"Error extracting audio from video: {e}")
os.remove(temp_video_path)
st.stop()
# Function to transcribe audio in chunks
def transcribe_audio_in_chunks(audio_path, model, chunk_length=30):
audio_clip = whisper.load_audio(audio_path)
audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE # Calculate duration in seconds
segments = []
for start in np.arange(0, audio_duration, chunk_length):
end = min(start + chunk_length, audio_duration)
segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)]
result = model.transcribe(segment)
segments.append(result['text'])
return ' '.join(segments)
# Function to translate text in chunks
def translate_in_chunks(text, translator, max_length=500):
words = text.split()
chunks = []
current_chunk = ""
for word in words:
if len(current_chunk) + len(word) + 1 <= max_length:
current_chunk += " " + word if current_chunk else word
else:
chunks.append(current_chunk)
current_chunk = word
if current_chunk:
chunks.append(current_chunk)
translated_chunks = [translator.translate(chunk) for chunk in chunks]
return ' '.join(translated_chunks)
# Transcribe audio using Whisper
try:
original_text = transcribe_audio_in_chunks(audio_path, whisper_model)
st.write("Original Transcription:", original_text)
# Translate text to the target language
translator = Translator(to_lang=LANGUAGES[target_language])
translated_text = translate_in_chunks(original_text, translator)
st.write(f"Translated Text ({target_language}):", translated_text)
# Convert translated text to speech
tts = gTTS(text=translated_text, lang=LANGUAGES[target_language])
audio_output_path = tempfile.mktemp(suffix=".mp3")
tts.save(audio_output_path)
# Display translated text and audio
st.success("Translation successful!")
st.audio(audio_output_path, format="audio/mp3")
except Exception as e:
st.error(f"Error during transcription/translation: {e}")
audio_output_path = None # Ensure this variable is defined
# Clean up temporary files
os.remove(temp_video_path)
os.remove(audio_path)
if audio_output_path: # Only remove if it was created
os.remove(audio_output_path)
|