|
import streamlit as st |
|
import moviepy.editor as mp |
|
import speech_recognition as sr |
|
from deep_translator import GoogleTranslator |
|
import tempfile |
|
import os |
|
from pydub import AudioSegment |
|
import torch |
|
from TTS.api import TTS |
|
import pyttsx3 |
|
import numpy as np |
|
from scipy.io import wavfile |
|
import soundfile as sf |
|
|
|
class EnhancedVideoTranslator: |
|
def __init__(self): |
|
self.device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
try: |
|
self.tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2").to(self.device) |
|
except: |
|
|
|
self.tts = TTS(model_name="tts_models/multilingual/multi-dataset/bark").to(self.device) |
|
|
|
|
|
self.pyttsx3_engine = pyttsx3.init() |
|
|
|
def extract_audio(self, video_path): |
|
video = mp.VideoFileClip(video_path) |
|
audio = video.audio |
|
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') |
|
audio.write_audiofile(temp_audio.name) |
|
return temp_audio.name |
|
|
|
def enhance_audio(self, audio_path): |
|
audio = AudioSegment.from_wav(audio_path) |
|
|
|
|
|
enhanced = audio.high_pass_filter(80) |
|
enhanced = enhanced.low_pass_filter(7500) |
|
enhanced = enhanced.normalize() |
|
|
|
|
|
enhanced = enhanced.compress_dynamic_range() |
|
|
|
enhanced_path = audio_path.replace('.wav', '_enhanced.wav') |
|
enhanced.export(enhanced_path, format="wav") |
|
return enhanced_path |
|
|
|
def speech_to_text(self, audio_path): |
|
recognizer = sr.Recognizer() |
|
|
|
with sr.AudioFile(audio_path) as source: |
|
recognizer.adjust_for_ambient_noise(source) |
|
audio = recognizer.record(source) |
|
|
|
try: |
|
|
|
text = recognizer.recognize_google(audio) |
|
return text |
|
except Exception as e: |
|
return str(e) |
|
|
|
def translate_text(self, text, target_lang): |
|
translator = GoogleTranslator(source='auto', target=target_lang) |
|
return translator.translate(text) |
|
|
|
def text_to_speech_coqui(self, text, lang): |
|
try: |
|
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') |
|
|
|
|
|
if lang == 'ta': |
|
speaker = "tamil_female" |
|
elif lang == 'hi': |
|
speaker = "hindi_female" |
|
else: |
|
speaker = None |
|
|
|
self.tts.tts_to_file( |
|
text=text, |
|
file_path=temp_audio.name, |
|
speaker=speaker, |
|
language=lang |
|
) |
|
|
|
return temp_audio.name |
|
except Exception as e: |
|
print(f"Coqui TTS failed: {e}") |
|
return self.text_to_speech_pyttsx3(text, lang) |
|
|
|
def text_to_speech_pyttsx3(self, text, lang): |
|
try: |
|
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') |
|
|
|
|
|
engine = self.pyttsx3_engine |
|
|
|
|
|
if lang == 'ta': |
|
engine.setProperty('voice', 'tamil') |
|
elif lang == 'hi': |
|
engine.setProperty('voice', 'hindi') |
|
|
|
|
|
engine.setProperty('rate', 150) |
|
engine.setProperty('volume', 0.9) |
|
|
|
|
|
engine.save_to_file(text, temp_audio.name) |
|
engine.runAndWait() |
|
|
|
return temp_audio.name |
|
except Exception as e: |
|
print(f"pyttsx3 TTS failed: {e}") |
|
return None |
|
|
|
def improve_audio_quality(self, audio_path): |
|
|
|
audio, sr = sf.read(audio_path) |
|
|
|
|
|
audio = audio * 1.5 |
|
audio = np.clip(audio, -1, 1) |
|
|
|
|
|
improved_path = audio_path.replace('.wav', '_improved.wav') |
|
sf.write(improved_path, audio, sr) |
|
return improved_path |
|
|
|
def main(): |
|
st.title("Enhanced AI Video Translator") |
|
st.write("Free and Open Source Video Translation with Realistic TTS") |
|
|
|
LANGUAGES = { |
|
'English': 'en', |
|
'Tamil': 'ta', |
|
'Hindi': 'hi', |
|
'Telugu': 'te', |
|
'Malayalam': 'ml', |
|
'Kannada': 'kn', |
|
'Spanish': 'es', |
|
'French': 'fr', |
|
'German': 'de', |
|
'Japanese': 'ja', |
|
'Chinese': 'zh', |
|
'Korean': 'ko' |
|
} |
|
|
|
translator = EnhancedVideoTranslator() |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.stButton>button { |
|
background-color: #4CAF50; |
|
color: white; |
|
padding: 10px 24px; |
|
border-radius: 5px; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
video_file = st.file_uploader( |
|
"Upload your video", |
|
type=['mp4', 'avi', 'mov'], |
|
help="Supported formats: MP4, AVI, MOV" |
|
) |
|
|
|
if video_file: |
|
st.video(video_file) |
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
target_language = st.selectbox( |
|
"Target Language", |
|
list(LANGUAGES.keys()) |
|
) |
|
|
|
with col2: |
|
tts_engine = st.selectbox( |
|
"TTS Engine", |
|
["Coqui TTS", "pyttsx3"] |
|
) |
|
|
|
|
|
with st.expander("Advanced Settings"): |
|
quality_enhancement = st.checkbox("Enable Audio Enhancement", True) |
|
speed = st.slider("Speech Speed", 0.5, 2.0, 1.0, 0.1) |
|
volume = st.slider("Volume", 0.0, 2.0, 1.0, 0.1) |
|
|
|
if st.button("Translate Video"): |
|
try: |
|
progress_bar = st.progress(0) |
|
status = st.empty() |
|
|
|
|
|
temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') |
|
temp_video.write(video_file.read()) |
|
|
|
|
|
status.text("Extracting audio...") |
|
progress_bar.progress(20) |
|
audio_path = translator.extract_audio(temp_video.name) |
|
|
|
if quality_enhancement: |
|
audio_path = translator.enhance_audio(audio_path) |
|
|
|
|
|
status.text("Converting speech to text...") |
|
progress_bar.progress(40) |
|
original_text = translator.speech_to_text(audio_path) |
|
|
|
|
|
status.text("Translating...") |
|
progress_bar.progress(60) |
|
translated_text = translator.translate_text( |
|
original_text, |
|
LANGUAGES[target_language] |
|
) |
|
|
|
|
|
status.text("Generating speech...") |
|
progress_bar.progress(80) |
|
|
|
if tts_engine == "Coqui TTS": |
|
translated_audio = translator.text_to_speech_coqui( |
|
translated_text, |
|
LANGUAGES[target_language] |
|
) |
|
else: |
|
translated_audio = translator.text_to_speech_pyttsx3( |
|
translated_text, |
|
LANGUAGES[target_language] |
|
) |
|
|
|
if quality_enhancement: |
|
translated_audio = translator.improve_audio_quality(translated_audio) |
|
|
|
|
|
status.text("Creating final video...") |
|
progress_bar.progress(90) |
|
|
|
output_path = "translated_video.mp4" |
|
video = mp.VideoFileClip(temp_video.name) |
|
audio = mp.AudioFileClip(translated_audio) |
|
|
|
final_video = video.set_audio(audio) |
|
final_video.write_videofile(output_path) |
|
|
|
progress_bar.progress(100) |
|
status.text("Complete!") |
|
|
|
|
|
st.success("Translation completed!") |
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.subheader("Original Text") |
|
st.write(original_text) |
|
with col2: |
|
st.subheader("Translated Text") |
|
st.write(translated_text) |
|
|
|
st.subheader("Translated Video") |
|
st.video(output_path) |
|
|
|
|
|
for file in [temp_video.name, audio_path, translated_audio, output_path]: |
|
if os.path.exists(file): |
|
os.unlink(file) |
|
|
|
except Exception as e: |
|
st.error(f"An error occurred: {str(e)}") |
|
|
|
if __name__ == "__main__": |
|
main() |