Artificial-superintelligence commited on
Commit
13dbd66
·
verified ·
1 Parent(s): 904c22d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +277 -0
app.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import moviepy.editor as mp
3
+ import speech_recognition as sr
4
+ from deep_translator import GoogleTranslator
5
+ import tempfile
6
+ import os
7
+ from pydub import AudioSegment
8
+ import torch
9
+ from TTS.api import TTS
10
+ import pyttsx3
11
+ import numpy as np
12
+ from scipy.io import wavfile
13
+ import soundfile as sf
14
+
15
+ class EnhancedVideoTranslator:
16
+ def __init__(self):
17
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ # Initialize Coqui TTS
19
+ try:
20
+ self.tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
21
+ except:
22
+ # Fallback to a smaller model if XTTS fails
23
+ self.tts = TTS(model_name="tts_models/multilingual/multi-dataset/bark").to(self.device)
24
+
25
+ # Initialize pyttsx3 as backup
26
+ self.pyttsx3_engine = pyttsx3.init()
27
+
28
+ def extract_audio(self, video_path):
29
+ video = mp.VideoFileClip(video_path)
30
+ audio = video.audio
31
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
32
+ audio.write_audiofile(temp_audio.name)
33
+ return temp_audio.name
34
+
35
+ def enhance_audio(self, audio_path):
36
+ audio = AudioSegment.from_wav(audio_path)
37
+
38
+ # Noise reduction and enhancement
39
+ enhanced = audio.high_pass_filter(80)
40
+ enhanced = enhanced.low_pass_filter(7500)
41
+ enhanced = enhanced.normalize()
42
+
43
+ # Apply compression
44
+ enhanced = enhanced.compress_dynamic_range()
45
+
46
+ enhanced_path = audio_path.replace('.wav', '_enhanced.wav')
47
+ enhanced.export(enhanced_path, format="wav")
48
+ return enhanced_path
49
+
50
+ def speech_to_text(self, audio_path):
51
+ recognizer = sr.Recognizer()
52
+
53
+ with sr.AudioFile(audio_path) as source:
54
+ recognizer.adjust_for_ambient_noise(source)
55
+ audio = recognizer.record(source)
56
+
57
+ try:
58
+ # Try multiple language detection
59
+ text = recognizer.recognize_google(audio)
60
+ return text
61
+ except Exception as e:
62
+ return str(e)
63
+
64
+ def translate_text(self, text, target_lang):
65
+ translator = GoogleTranslator(source='auto', target=target_lang)
66
+ return translator.translate(text)
67
+
68
+ def text_to_speech_coqui(self, text, lang):
69
+ try:
70
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
71
+
72
+ # Use language-specific voices if available
73
+ if lang == 'ta':
74
+ speaker = "tamil_female"
75
+ elif lang == 'hi':
76
+ speaker = "hindi_female"
77
+ else:
78
+ speaker = None
79
+
80
+ self.tts.tts_to_file(
81
+ text=text,
82
+ file_path=temp_audio.name,
83
+ speaker=speaker,
84
+ language=lang
85
+ )
86
+
87
+ return temp_audio.name
88
+ except Exception as e:
89
+ print(f"Coqui TTS failed: {e}")
90
+ return self.text_to_speech_pyttsx3(text, lang)
91
+
92
+ def text_to_speech_pyttsx3(self, text, lang):
93
+ try:
94
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
95
+
96
+ # Configure pyttsx3
97
+ engine = self.pyttsx3_engine
98
+
99
+ # Set language properties
100
+ if lang == 'ta':
101
+ engine.setProperty('voice', 'tamil')
102
+ elif lang == 'hi':
103
+ engine.setProperty('voice', 'hindi')
104
+
105
+ # Adjust voice properties
106
+ engine.setProperty('rate', 150) # Speed
107
+ engine.setProperty('volume', 0.9) # Volume
108
+
109
+ # Save to file
110
+ engine.save_to_file(text, temp_audio.name)
111
+ engine.runAndWait()
112
+
113
+ return temp_audio.name
114
+ except Exception as e:
115
+ print(f"pyttsx3 TTS failed: {e}")
116
+ return None
117
+
118
+ def improve_audio_quality(self, audio_path):
119
+ # Load audio
120
+ audio, sr = sf.read(audio_path)
121
+
122
+ # Apply basic audio improvements
123
+ audio = audio * 1.5 # Increase volume
124
+ audio = np.clip(audio, -1, 1) # Prevent clipping
125
+
126
+ # Save improved audio
127
+ improved_path = audio_path.replace('.wav', '_improved.wav')
128
+ sf.write(improved_path, audio, sr)
129
+ return improved_path
130
+
131
+ def main():
132
+ st.title("Enhanced AI Video Translator")
133
+ st.write("Free and Open Source Video Translation with Realistic TTS")
134
+
135
+ LANGUAGES = {
136
+ 'English': 'en',
137
+ 'Tamil': 'ta',
138
+ 'Hindi': 'hi',
139
+ 'Telugu': 'te',
140
+ 'Malayalam': 'ml',
141
+ 'Kannada': 'kn',
142
+ 'Spanish': 'es',
143
+ 'French': 'fr',
144
+ 'German': 'de',
145
+ 'Japanese': 'ja',
146
+ 'Chinese': 'zh',
147
+ 'Korean': 'ko'
148
+ }
149
+
150
+ translator = EnhancedVideoTranslator()
151
+
152
+ # Improved UI
153
+ st.markdown("""
154
+ <style>
155
+ .stButton>button {
156
+ background-color: #4CAF50;
157
+ color: white;
158
+ padding: 10px 24px;
159
+ border-radius: 5px;
160
+ }
161
+ </style>
162
+ """, unsafe_allow_html=True)
163
+
164
+ video_file = st.file_uploader(
165
+ "Upload your video",
166
+ type=['mp4', 'avi', 'mov'],
167
+ help="Supported formats: MP4, AVI, MOV"
168
+ )
169
+
170
+ if video_file:
171
+ st.video(video_file)
172
+
173
+ col1, col2 = st.columns(2)
174
+ with col1:
175
+ target_language = st.selectbox(
176
+ "Target Language",
177
+ list(LANGUAGES.keys())
178
+ )
179
+
180
+ with col2:
181
+ tts_engine = st.selectbox(
182
+ "TTS Engine",
183
+ ["Coqui TTS", "pyttsx3"]
184
+ )
185
+
186
+ # Advanced options
187
+ with st.expander("Advanced Settings"):
188
+ quality_enhancement = st.checkbox("Enable Audio Enhancement", True)
189
+ speed = st.slider("Speech Speed", 0.5, 2.0, 1.0, 0.1)
190
+ volume = st.slider("Volume", 0.0, 2.0, 1.0, 0.1)
191
+
192
+ if st.button("Translate Video"):
193
+ try:
194
+ progress_bar = st.progress(0)
195
+ status = st.empty()
196
+
197
+ # Process video
198
+ temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
199
+ temp_video.write(video_file.read())
200
+
201
+ # Extract audio
202
+ status.text("Extracting audio...")
203
+ progress_bar.progress(20)
204
+ audio_path = translator.extract_audio(temp_video.name)
205
+
206
+ if quality_enhancement:
207
+ audio_path = translator.enhance_audio(audio_path)
208
+
209
+ # Speech to text
210
+ status.text("Converting speech to text...")
211
+ progress_bar.progress(40)
212
+ original_text = translator.speech_to_text(audio_path)
213
+
214
+ # Translate
215
+ status.text("Translating...")
216
+ progress_bar.progress(60)
217
+ translated_text = translator.translate_text(
218
+ original_text,
219
+ LANGUAGES[target_language]
220
+ )
221
+
222
+ # Text to speech
223
+ status.text("Generating speech...")
224
+ progress_bar.progress(80)
225
+
226
+ if tts_engine == "Coqui TTS":
227
+ translated_audio = translator.text_to_speech_coqui(
228
+ translated_text,
229
+ LANGUAGES[target_language]
230
+ )
231
+ else:
232
+ translated_audio = translator.text_to_speech_pyttsx3(
233
+ translated_text,
234
+ LANGUAGES[target_language]
235
+ )
236
+
237
+ if quality_enhancement:
238
+ translated_audio = translator.improve_audio_quality(translated_audio)
239
+
240
+ # Create final video
241
+ status.text("Creating final video...")
242
+ progress_bar.progress(90)
243
+
244
+ output_path = "translated_video.mp4"
245
+ video = mp.VideoFileClip(temp_video.name)
246
+ audio = mp.AudioFileClip(translated_audio)
247
+
248
+ final_video = video.set_audio(audio)
249
+ final_video.write_videofile(output_path)
250
+
251
+ progress_bar.progress(100)
252
+ status.text("Complete!")
253
+
254
+ # Display results
255
+ st.success("Translation completed!")
256
+
257
+ col1, col2 = st.columns(2)
258
+ with col1:
259
+ st.subheader("Original Text")
260
+ st.write(original_text)
261
+ with col2:
262
+ st.subheader("Translated Text")
263
+ st.write(translated_text)
264
+
265
+ st.subheader("Translated Video")
266
+ st.video(output_path)
267
+
268
+ # Cleanup
269
+ for file in [temp_video.name, audio_path, translated_audio, output_path]:
270
+ if os.path.exists(file):
271
+ os.unlink(file)
272
+
273
+ except Exception as e:
274
+ st.error(f"An error occurred: {str(e)}")
275
+
276
+ if __name__ == "__main__":
277
+ main()