Update app.py
Browse files
app.py
CHANGED
@@ -189,56 +189,6 @@ def translate_text(transcription_json, source_language, target_language):
|
|
189 |
# Return the translated timestamps as a JSON string
|
190 |
return translated_json
|
191 |
|
192 |
-
def add_transcript_to_video(video_path, translated_json, output_path):
|
193 |
-
# Load the video file
|
194 |
-
video = VideoFileClip(video_path)
|
195 |
-
|
196 |
-
# Create text clips based on timestamps
|
197 |
-
text_clips = []
|
198 |
-
|
199 |
-
logger.debug("Full translated_json: %s", translated_json)
|
200 |
-
|
201 |
-
# Define relative font size based on video height (adjust this value as necessary)
|
202 |
-
# Define relative font size based on video height (adjust this value as necessary)
|
203 |
-
subtitle_font_size = int(video.h // 15) # Ensure it's an integer
|
204 |
-
|
205 |
-
# Set maximum width for subtitle wrapping (80% of video width)
|
206 |
-
max_subtitle_width = int(video.w * 0.8) # Ensure it's an integer
|
207 |
-
|
208 |
-
font_path = "./NotoSansSC-Regular.ttf"
|
209 |
-
|
210 |
-
for entry in translated_json:
|
211 |
-
logger.debug("Processing entry: %s", entry)
|
212 |
-
|
213 |
-
# Ensure `entry` is a dictionary with keys "start", "end", and "translated"
|
214 |
-
if isinstance(entry, dict) and "translated" in entry:
|
215 |
-
txt_clip = TextClip(
|
216 |
-
text=entry["translated"],
|
217 |
-
font=font_path,
|
218 |
-
method='caption',
|
219 |
-
color='yellow',
|
220 |
-
font_size=subtitle_font_size, # Use relative font size
|
221 |
-
size=(max_subtitle_width, None) # Restrict the width to ensure wrapping
|
222 |
-
).with_start(entry["start"]).with_duration(entry["end"] - entry["start"]).with_position(('bottom')).with_opacity(0.7)
|
223 |
-
text_clips.append(txt_clip)
|
224 |
-
else:
|
225 |
-
raise ValueError(f"Invalid entry format: {entry}")
|
226 |
-
|
227 |
-
# Overlay all text clips on the original video
|
228 |
-
final_video = CompositeVideoClip([video] + text_clips)
|
229 |
-
|
230 |
-
# Write the result to a file
|
231 |
-
final_video.write_videofile(output_path, codec='libx264', audio_codec='aac')
|
232 |
-
|
233 |
-
# Mock functions for platform actions and analytics
|
234 |
-
def mock_post_to_platform(platform, content_title):
|
235 |
-
return f"Content '{content_title}' successfully posted on {platform}!"
|
236 |
-
|
237 |
-
def mock_analytics():
|
238 |
-
return {
|
239 |
-
"YouTube": {"Views": random.randint(1000, 5000), "Engagement Rate": f"{random.uniform(5, 15):.2f}%"},
|
240 |
-
"Instagram": {"Views": random.randint(500, 3000), "Engagement Rate": f"{random.uniform(10, 20):.2f}%"},
|
241 |
-
}
|
242 |
|
243 |
def update_translations(file, edited_table):
|
244 |
"""
|
@@ -262,7 +212,7 @@ def update_translations(file, edited_table):
|
|
262 |
]
|
263 |
|
264 |
# Call the function to process the video with updated translations
|
265 |
-
|
266 |
|
267 |
# Calculate elapsed time
|
268 |
elapsed_time = time.time() - start_time
|
@@ -273,27 +223,96 @@ def update_translations(file, edited_table):
|
|
273 |
except Exception as e:
|
274 |
raise ValueError(f"Error updating translations: {e}")
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
def generate_voiceover(translated_json, language, output_audio_path):
|
277 |
-
|
|
|
|
|
278 |
# Concatenate translated text into a single string
|
279 |
full_text = " ".join(entry["translated"] for entry in translated_json)
|
280 |
-
|
281 |
-
# Generate speech
|
282 |
tts = gTTS(text=full_text, lang=language)
|
283 |
tts.save(output_audio_path)
|
284 |
|
285 |
def replace_audio_in_video(video_path: str, new_audio_path: str, final_video_path: str):
|
|
|
|
|
|
|
286 |
try:
|
287 |
# Load the video file
|
288 |
logger.info(f"Loading video from: {video_path}")
|
289 |
video = VideoFileClip(video_path)
|
290 |
-
|
291 |
# Load the new audio file
|
292 |
logger.info(f"Loading audio from: {new_audio_path}")
|
293 |
new_audio = AudioFileClip(new_audio_path)
|
294 |
-
|
295 |
-
#
|
296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
|
298 |
# Set the new audio to the video
|
299 |
logger.info("Replacing video audio...")
|
@@ -304,10 +323,24 @@ def replace_audio_in_video(video_path: str, new_audio_path: str, final_video_pat
|
|
304 |
video.write_videofile(final_video_path, codec="libx264", audio_codec="aac")
|
305 |
|
306 |
logger.info("Video processing completed successfully.")
|
307 |
-
|
308 |
except Exception as e:
|
309 |
logger.error(f"Error replacing audio in video: {e}")
|
310 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
def upload_and_manage(file, target_language, mode="transcription"):
|
312 |
if file is None:
|
313 |
logger.info("No file uploaded. Please upload a video/audio file.")
|
@@ -337,17 +370,9 @@ def upload_and_manage(file, target_language, mode="transcription"):
|
|
337 |
|
338 |
# Step 3: Add transcript to video based on timestamps
|
339 |
logger.info("Adding translated transcript to video...")
|
340 |
-
|
341 |
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
342 |
|
343 |
-
# Step 4 (Optional): Generate voiceover if mode is "Transcription with Voiceover"
|
344 |
-
if mode == "Transcription with Voiceover":
|
345 |
-
logger.info("Generating voiceover for video...")
|
346 |
-
generate_voiceover(translated_json, target_language, voiceover_path)
|
347 |
-
logger.info("Voiceover generated. Replacing audio in video...")
|
348 |
-
replace_audio_in_video(output_video_path, voiceover_path, output_video_path)
|
349 |
-
logger.info("Audio replaced in video.")
|
350 |
-
|
351 |
# Convert translated JSON into a format for the editable table
|
352 |
logger.info("Converting translated JSON into editable table format...")
|
353 |
editable_table = [
|
|
|
189 |
# Return the translated timestamps as a JSON string
|
190 |
return translated_json
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
def update_translations(file, edited_table):
|
194 |
"""
|
|
|
212 |
]
|
213 |
|
214 |
# Call the function to process the video with updated translations
|
215 |
+
add_transcript_voiceover(file.name, updated_translations, output_video_path)
|
216 |
|
217 |
# Calculate elapsed time
|
218 |
elapsed_time = time.time() - start_time
|
|
|
223 |
except Exception as e:
|
224 |
raise ValueError(f"Error updating translations: {e}")
|
225 |
|
226 |
+
def add_transcript_voiceover(video_path, translated_json, output_path, add_voiceover=False, target_language="en"):
|
227 |
+
"""
|
228 |
+
Add transcript and voiceover to a video, segment by segment.
|
229 |
+
"""
|
230 |
+
# Load the video file
|
231 |
+
video = VideoFileClip(video_path)
|
232 |
+
|
233 |
+
# Create text clips based on timestamps
|
234 |
+
text_clips = []
|
235 |
+
audio_segments = []
|
236 |
+
|
237 |
+
# Define relative font size based on video height (adjust this value as necessary)
|
238 |
+
subtitle_font_size = int(video.h // 15) # Ensure it's an integer
|
239 |
+
max_subtitle_width = int(video.w * 0.8) # 80% of video width
|
240 |
+
|
241 |
+
font_path = "./NotoSansSC-Regular.ttf"
|
242 |
+
|
243 |
+
for i, entry in enumerate(translated_json):
|
244 |
+
logger.debug(f"Processing entry {i}: {entry}")
|
245 |
+
|
246 |
+
# Ensure `entry` is a dictionary with keys "start", "end", and "translated"
|
247 |
+
if isinstance(entry, dict) and "translated" in entry:
|
248 |
+
# Create text clip for subtitles
|
249 |
+
txt_clip = TextClip(
|
250 |
+
text=entry["translated"],
|
251 |
+
font=font_path,
|
252 |
+
method='caption',
|
253 |
+
color='yellow',
|
254 |
+
fontsize=subtitle_font_size,
|
255 |
+
size=(max_subtitle_width, None)
|
256 |
+
).with_start(entry["start"]).with_duration(entry["end"] - entry["start"]).with_position(('bottom')).with_opacity(0.7)
|
257 |
+
text_clips.append(txt_clip)
|
258 |
+
|
259 |
+
# Generate voiceover for this segment, if needed
|
260 |
+
if add_voiceover:
|
261 |
+
segment_audio_path = f"segment_{i}_voiceover.wav"
|
262 |
+
generate_voiceover([entry], target_language, segment_audio_path)
|
263 |
+
audio_segment = AudioFileClip(segment_audio_path).subclip(0, entry["end"] - entry["start"])
|
264 |
+
audio_segments.append(audio_segment)
|
265 |
+
else:
|
266 |
+
raise ValueError(f"Invalid entry format: {entry}")
|
267 |
+
|
268 |
+
# Combine the text clips
|
269 |
+
final_video = CompositeVideoClip([video] + text_clips)
|
270 |
+
|
271 |
+
# Concatenate all audio segments if voiceover was added
|
272 |
+
if add_voiceover:
|
273 |
+
final_audio = sum(audio_segments, AudioFileClip("silent_audio.wav")) # Mix all audio segments
|
274 |
+
final_audio = final_audio.subclip(0, video.duration) # Ensure the final audio matches the video duration
|
275 |
+
final_video = final_video.set_audio(final_audio)
|
276 |
+
|
277 |
+
# Write the result to a file
|
278 |
+
logger.info(f"Saving the final video to: {output_path}")
|
279 |
+
final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")
|
280 |
+
|
281 |
+
logger.info("Video processing completed successfully.")
|
282 |
+
|
283 |
def generate_voiceover(translated_json, language, output_audio_path):
|
284 |
+
"""
|
285 |
+
Generate voiceover from translated text for a given language.
|
286 |
+
"""
|
287 |
# Concatenate translated text into a single string
|
288 |
full_text = " ".join(entry["translated"] for entry in translated_json)
|
289 |
+
|
290 |
+
# Generate speech and save to file
|
291 |
tts = gTTS(text=full_text, lang=language)
|
292 |
tts.save(output_audio_path)
|
293 |
|
294 |
def replace_audio_in_video(video_path: str, new_audio_path: str, final_video_path: str):
|
295 |
+
"""
|
296 |
+
Replace the audio in the video with the provided new audio.
|
297 |
+
"""
|
298 |
try:
|
299 |
# Load the video file
|
300 |
logger.info(f"Loading video from: {video_path}")
|
301 |
video = VideoFileClip(video_path)
|
302 |
+
|
303 |
# Load the new audio file
|
304 |
logger.info(f"Loading audio from: {new_audio_path}")
|
305 |
new_audio = AudioFileClip(new_audio_path)
|
306 |
+
|
307 |
+
# Ensure the audio matches the video's duration
|
308 |
+
audio_duration = new_audio.duration
|
309 |
+
video_duration = video.duration
|
310 |
+
if audio_duration < video_duration:
|
311 |
+
logger.info(f"Audio is shorter than video. Looping audio to match video duration.")
|
312 |
+
new_audio = new_audio.fx("audio_loop", duration=video_duration)
|
313 |
+
elif audio_duration > video_duration:
|
314 |
+
logger.info(f"Audio is longer than video. Truncating audio.")
|
315 |
+
new_audio = new_audio.subclip(0, video_duration)
|
316 |
|
317 |
# Set the new audio to the video
|
318 |
logger.info("Replacing video audio...")
|
|
|
323 |
video.write_videofile(final_video_path, codec="libx264", audio_codec="aac")
|
324 |
|
325 |
logger.info("Video processing completed successfully.")
|
|
|
326 |
except Exception as e:
|
327 |
logger.error(f"Error replacing audio in video: {e}")
|
328 |
|
329 |
+
def check_for_time_gaps(translated_json):
|
330 |
+
"""
|
331 |
+
Ensure there are no gaps in the timestamps, and adjust if necessary.
|
332 |
+
"""
|
333 |
+
for i in range(1, len(translated_json)):
|
334 |
+
prev_end = translated_json[i - 1]["end"]
|
335 |
+
curr_start = translated_json[i]["start"]
|
336 |
+
|
337 |
+
if prev_end > curr_start:
|
338 |
+
logger.warning(f"Found gap between segments at {i}. Adjusting timestamps.")
|
339 |
+
# Optionally, adjust the start time of the next segment
|
340 |
+
translated_json[i]["start"] = prev_end # You can adjust this to smooth the transition
|
341 |
+
|
342 |
+
return translated_json
|
343 |
+
|
344 |
def upload_and_manage(file, target_language, mode="transcription"):
|
345 |
if file is None:
|
346 |
logger.info("No file uploaded. Please upload a video/audio file.")
|
|
|
370 |
|
371 |
# Step 3: Add transcript to video based on timestamps
|
372 |
logger.info("Adding translated transcript to video...")
|
373 |
+
add_transcript_voiceover(video_path, translated_json, output_path, mode == "Transcription with Voiceover", target_language):
|
374 |
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
375 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
# Convert translated JSON into a format for the editable table
|
377 |
logger.info("Converting translated JSON into editable table format...")
|
378 |
editable_table = [
|