import streamlit as st import moviepy.editor as mp import speech_recognition as sr from deep_translator import GoogleTranslator import tempfile import os from pydub import AudioSegment import torch from TTS.api import TTS import pyttsx3 import numpy as np from scipy.io import wavfile import soundfile as sf class EnhancedVideoTranslator: def __init__(self): self.device = "cuda" if torch.cuda.is_available() else "cpu" # Initialize Coqui TTS try: self.tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2").to(self.device) except: # Fallback to a smaller model if XTTS fails self.tts = TTS(model_name="tts_models/multilingual/multi-dataset/bark").to(self.device) # Initialize pyttsx3 as backup self.pyttsx3_engine = pyttsx3.init() def extract_audio(self, video_path): video = mp.VideoFileClip(video_path) audio = video.audio temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') audio.write_audiofile(temp_audio.name) return temp_audio.name def enhance_audio(self, audio_path): audio = AudioSegment.from_wav(audio_path) # Noise reduction and enhancement enhanced = audio.high_pass_filter(80) enhanced = enhanced.low_pass_filter(7500) enhanced = enhanced.normalize() # Apply compression enhanced = enhanced.compress_dynamic_range() enhanced_path = audio_path.replace('.wav', '_enhanced.wav') enhanced.export(enhanced_path, format="wav") return enhanced_path def speech_to_text(self, audio_path): recognizer = sr.Recognizer() with sr.AudioFile(audio_path) as source: recognizer.adjust_for_ambient_noise(source) audio = recognizer.record(source) try: # Try multiple language detection text = recognizer.recognize_google(audio) return text except Exception as e: return str(e) def translate_text(self, text, target_lang): translator = GoogleTranslator(source='auto', target=target_lang) return translator.translate(text) def text_to_speech_coqui(self, text, lang): try: temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') # Use language-specific voices if available if lang == 'ta': speaker = "tamil_female" elif lang == 'hi': speaker = "hindi_female" else: speaker = None self.tts.tts_to_file( text=text, file_path=temp_audio.name, speaker=speaker, language=lang ) return temp_audio.name except Exception as e: print(f"Coqui TTS failed: {e}") return self.text_to_speech_pyttsx3(text, lang) def text_to_speech_pyttsx3(self, text, lang): try: temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') # Configure pyttsx3 engine = self.pyttsx3_engine # Set language properties if lang == 'ta': engine.setProperty('voice', 'tamil') elif lang == 'hi': engine.setProperty('voice', 'hindi') # Adjust voice properties engine.setProperty('rate', 150) # Speed engine.setProperty('volume', 0.9) # Volume # Save to file engine.save_to_file(text, temp_audio.name) engine.runAndWait() return temp_audio.name except Exception as e: print(f"pyttsx3 TTS failed: {e}") return None def improve_audio_quality(self, audio_path): # Load audio audio, sr = sf.read(audio_path) # Apply basic audio improvements audio = audio * 1.5 # Increase volume audio = np.clip(audio, -1, 1) # Prevent clipping # Save improved audio improved_path = audio_path.replace('.wav', '_improved.wav') sf.write(improved_path, audio, sr) return improved_path def main(): st.title("Enhanced AI Video Translator") st.write("Free and Open Source Video Translation with Realistic TTS") LANGUAGES = { 'English': 'en', 'Tamil': 'ta', 'Hindi': 'hi', 'Telugu': 'te', 'Malayalam': 'ml', 'Kannada': 'kn', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Japanese': 'ja', 'Chinese': 'zh', 'Korean': 'ko' } translator = EnhancedVideoTranslator() # Improved UI st.markdown(""" """, unsafe_allow_html=True) video_file = st.file_uploader( "Upload your video", type=['mp4', 'avi', 'mov'], help="Supported formats: MP4, AVI, MOV" ) if video_file: st.video(video_file) col1, col2 = st.columns(2) with col1: target_language = st.selectbox( "Target Language", list(LANGUAGES.keys()) ) with col2: tts_engine = st.selectbox( "TTS Engine", ["Coqui TTS", "pyttsx3"] ) # Advanced options with st.expander("Advanced Settings"): quality_enhancement = st.checkbox("Enable Audio Enhancement", True) speed = st.slider("Speech Speed", 0.5, 2.0, 1.0, 0.1) volume = st.slider("Volume", 0.0, 2.0, 1.0, 0.1) if st.button("Translate Video"): try: progress_bar = st.progress(0) status = st.empty() # Process video temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') temp_video.write(video_file.read()) # Extract audio status.text("Extracting audio...") progress_bar.progress(20) audio_path = translator.extract_audio(temp_video.name) if quality_enhancement: audio_path = translator.enhance_audio(audio_path) # Speech to text status.text("Converting speech to text...") progress_bar.progress(40) original_text = translator.speech_to_text(audio_path) # Translate status.text("Translating...") progress_bar.progress(60) translated_text = translator.translate_text( original_text, LANGUAGES[target_language] ) # Text to speech status.text("Generating speech...") progress_bar.progress(80) if tts_engine == "Coqui TTS": translated_audio = translator.text_to_speech_coqui( translated_text, LANGUAGES[target_language] ) else: translated_audio = translator.text_to_speech_pyttsx3( translated_text, LANGUAGES[target_language] ) if quality_enhancement: translated_audio = translator.improve_audio_quality(translated_audio) # Create final video status.text("Creating final video...") progress_bar.progress(90) output_path = "translated_video.mp4" video = mp.VideoFileClip(temp_video.name) audio = mp.AudioFileClip(translated_audio) final_video = video.set_audio(audio) final_video.write_videofile(output_path) progress_bar.progress(100) status.text("Complete!") # Display results st.success("Translation completed!") col1, col2 = st.columns(2) with col1: st.subheader("Original Text") st.write(original_text) with col2: st.subheader("Translated Text") st.write(translated_text) st.subheader("Translated Video") st.video(output_path) # Cleanup for file in [temp_video.name, audio_path, translated_audio, output_path]: if os.path.exists(file): os.unlink(file) except Exception as e: st.error(f"An error occurred: {str(e)}") if __name__ == "__main__": main()