import streamlit as st from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip import whisper from translate import Translator from gtts import gTTS import tempfile import os import numpy as np from datetime import timedelta import json from indic_transliteration import sanscript from indic_transliteration.sanscript import transliterate import azure.cognitiveservices.speech as speechsdk import ffmpeg from PIL import Image import imageio # Configure MoviePy to use imageio for reading images imageio.plugins.ffmpeg.download() # Configure ImageMagick policy to allow PDF and text file handling def configure_imagemagick(): """Configure ImageMagick policy to allow text operations""" policy_file = "/etc/ImageMagick-6/policy.xml" if os.path.exists(policy_file): try: with open(policy_file, 'r') as f: policy_content = f.read() # Modify policy to allow text file handling policy_content = policy_content.replace('rights="none" pattern="@*"', 'rights="read|write" pattern="@*"') with open(policy_file, 'w') as f: f.write(policy_content) except Exception as e: st.warning(f"Unable to configure ImageMagick policy: {e}") st.info("You may need to run this application with sudo privileges to modify ImageMagick policy") # Tamil-specific voice configurations TAMIL_VOICES = { 'Female 1': {'name': 'ta-IN-PallaviNeural', 'style': 'normal'}, 'Female 2': {'name': 'ta-IN-PallaviNeural', 'style': 'formal'}, 'Male 1': {'name': 'ta-IN-ValluvarNeural', 'style': 'normal'}, 'Male 2': {'name': 'ta-IN-ValluvarNeural', 'style': 'formal'} } class TamilTextProcessor: @staticmethod def normalize_tamil_text(text): """Normalize Tamil text for better pronunciation""" tamil_numerals = {'௦': '0', '௧': '1', '௨': '2', '௩': '3', '௪': '4', '௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'} for tamil_num, eng_num in tamil_numerals.items(): text = text.replace(tamil_num, eng_num) return text @staticmethod def process_for_tts(text): """Process Tamil text for TTS""" text = ''.join(char for char in text if ord(char) < 65535) text = ' '.join(text.split()) return text class TamilDubber: def __init__(self): try: self.whisper_model = whisper.load_model("base") except Exception as e: st.error(f"Error loading Whisper model: {e}") raise self.temp_files = [] def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.cleanup() def cleanup(self): for temp_file in self.temp_files: if os.path.exists(temp_file): try: os.remove(temp_file) except Exception: pass def create_temp_file(self, suffix): temp_file = tempfile.mktemp(suffix=suffix) self.temp_files.append(temp_file) return temp_file def extract_audio(self, video_path): """Extract audio and transcribe using Whisper""" try: video = VideoFileClip(video_path) audio_path = self.create_temp_file(".wav") video.audio.write_audiofile(audio_path) result = self.whisper_model.transcribe(audio_path) return result["segments"], video.duration except Exception as e: st.error(f"Error in audio extraction: {e}") raise def translate_segments(self, segments): """Translate segments to Tamil""" translator = Translator(to_lang='ta') translated_segments = [] for segment in segments: try: translated_text = translator.translate(segment["text"]) translated_text = TamilTextProcessor.normalize_tamil_text(translated_text) translated_text = TamilTextProcessor.process_for_tts(translated_text) translated_segments.append({ "text": translated_text, "start": segment["start"], "end": segment["end"], "duration": segment["end"] - segment["start"] }) except Exception as e: st.warning(f"Translation warning for segment: {str(e)}") translated_segments.append({ "text": segment["text"], "start": segment["start"], "end": segment["end"], "duration": segment["end"] - segment["start"] }) return translated_segments def generate_audio(self, text, voice_style="normal"): """Generate Tamil audio using gTTS""" try: temp_path = self.create_temp_file(".mp3") tts = gTTS(text=text, lang='ta', slow=False) tts.save(temp_path) return temp_path except Exception as e: st.error(f"Error in audio generation: {e}") raise def create_subtitles(self, segments, output_path): """Generate SRT subtitles""" try: with open(output_path, 'w', encoding='utf-8') as f: for idx, segment in enumerate(segments, 1): start_time = str(timedelta(seconds=int(segment["start"]))) end_time = str(timedelta(seconds=int(segment["end"]))) f.write(f"{idx}\n") f.write(f"{start_time} --> {end_time}\n") f.write(f"{segment['text']}\n\n") except Exception as e: st.error(f"Error creating subtitles: {e}") raise def create_subtitle_clip(self, txt, size, color): """Create subtitle clip with proper configuration""" try: return TextClip( txt=txt, font='DejaVu-Sans', # Use a system font that supports Tamil fontsize=size, color=color, stroke_color='black', stroke_width=1, method='caption', # Use caption method instead of label size=(720, None) # Set width, let height adjust automatically ) except Exception as e: st.error(f"Error creating subtitle clip: {e}") raise def main(): # Configure ImageMagick at startup configure_imagemagick() st.title("Tamil Movie Dubbing System") st.sidebar.header("டப்பிங் அமைப்புகள்") # Dubbing Settings in Tamil # File uploader video_file = st.file_uploader("Upload Video File", type=['mp4', 'mov', 'avi']) if not video_file: return # Settings voice_type = st.selectbox("Select Voice", list(TAMIL_VOICES.keys())) with st.expander("Advanced Settings"): generate_subtitles = st.checkbox("Generate Tamil Subtitles", value=True) subtitle_size = st.slider("Subtitle Size", 16, 32, 24) subtitle_color = st.color_picker("Subtitle Color", "#FFFFFF") if st.button("Start Tamil Dubbing"): try: with st.spinner("Processing video..."): with TamilDubber() as dubber: # Save uploaded video temp_video_path = dubber.create_temp_file(".mp4") with open(temp_video_path, "wb") as f: f.write(video_file.read()) # Progress tracking progress_bar = st.progress(0) status_text = st.empty() # Extract audio and transcribe status_text.text("Extracting audio and transcribing...") segments, video_duration = dubber.extract_audio(temp_video_path) progress_bar.progress(0.25) # Translate segments status_text.text("Translating to Tamil...") translated_segments = dubber.translate_segments(segments) progress_bar.progress(0.50) # Generate Tamil audio status_text.text("Generating Tamil audio...") output_segments = [] video = VideoFileClip(temp_video_path) for idx, segment in enumerate(translated_segments): audio_path = dubber.generate_audio(segment["text"]) output_segments.append({ "audio": audio_path, "start": segment["start"], "end": segment["end"] }) progress_bar.progress(0.50 + (0.25 * (idx + 1) / len(translated_segments))) # Create final video status_text.text("Creating final video...") output_path = dubber.create_temp_file(".mp4") # Add subtitles if enabled if generate_subtitles: subtitle_clips = [] for segment in translated_segments: try: clip = dubber.create_subtitle_clip( segment["text"], subtitle_size, subtitle_color ) clip = clip.set_position(('center', 'bottom')) clip = clip.set_start(segment["start"]) clip = clip.set_duration(segment["duration"]) subtitle_clips.append(clip) except Exception as e: st.warning(f"Skipping subtitle for segment due to error: {e}") final_video = CompositeVideoClip([video] + subtitle_clips) else: final_video = video # Write final video with proper codec settings final_video.write_videofile( output_path, codec='libx264', audio_codec='aac', fps=video.fps, threads=4, preset='medium' ) progress_bar.progress(1.0) # Display result st.success("டப்பிங் வெற்றிகரமாக முடிந்தது!") # Dubbing completed successfully in Tamil st.video(output_path) # Download button with open(output_path, "rb") as f: st.download_button( "Download Dubbed Video", f, file_name="tamil_dubbed_video.mp4", mime="video/mp4" ) except Exception as e: st.error(f"An error occurred: {str(e)}") if __name__ == "__main__": main()