Update app.py
Browse files
app.py
CHANGED
@@ -325,7 +325,7 @@ def translate_text(transcription_json, source_language, target_language):
|
|
325 |
|
326 |
return translated_json
|
327 |
|
328 |
-
def update_translations(file, edited_table,
|
329 |
"""
|
330 |
Update the translations based on user edits in the Gradio Dataframe.
|
331 |
"""
|
@@ -351,7 +351,7 @@ def update_translations(file, edited_table, mode):
|
|
351 |
]
|
352 |
|
353 |
# Call the function to process the video with updated translations
|
354 |
-
add_transcript_voiceover(file.name, updated_translations, output_video_path,
|
355 |
|
356 |
# Calculate elapsed time
|
357 |
elapsed_time = time.time() - start_time
|
@@ -408,7 +408,7 @@ def create_subtitle_clip_pil(text, start_time, end_time, video_width, video_heig
|
|
408 |
logger.error(f"\u274c Failed to create subtitle clip: {e}")
|
409 |
return None
|
410 |
|
411 |
-
def process_entry(entry, i, tts_model, video_width, video_height,
|
412 |
logger.debug(f"Processing entry {i}: {entry}")
|
413 |
error_message = None
|
414 |
|
@@ -420,7 +420,7 @@ def process_entry(entry, i, tts_model, video_width, video_height, add_voiceover,
|
|
420 |
txt_clip = None
|
421 |
|
422 |
audio_segment = None
|
423 |
-
if
|
424 |
try:
|
425 |
segment_audio_path = f"segment_{i}_voiceover.wav"
|
426 |
desired_duration = entry["end"] - entry["start"]
|
@@ -455,7 +455,7 @@ def process_entry(entry, i, tts_model, video_width, video_height, add_voiceover,
|
|
455 |
|
456 |
return i, txt_clip, audio_segment, error_message
|
457 |
|
458 |
-
def add_transcript_voiceover(video_path, translated_json, output_path,
|
459 |
video = VideoFileClip(video_path)
|
460 |
font_path = "./NotoSansSC-Regular.ttf"
|
461 |
|
@@ -463,7 +463,8 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
|
|
463 |
audio_segments = []
|
464 |
error_messages = []
|
465 |
|
466 |
-
if
|
|
|
467 |
if tts_model is None:
|
468 |
try:
|
469 |
print("🔄 Loading XTTS model...")
|
@@ -476,7 +477,7 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
|
|
476 |
## Need to implmenet backup option.
|
477 |
|
478 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
479 |
-
futures = [executor.submit(process_entry, entry, i, tts_model, video.w, video.h,
|
480 |
for i, entry in enumerate(translated_json)]
|
481 |
|
482 |
results = []
|
@@ -494,12 +495,12 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
|
|
494 |
# Sort by entry index to ensure order
|
495 |
results.sort(key=lambda x: x[0])
|
496 |
text_clips = [clip for _, clip, _ in results if clip]
|
497 |
-
if
|
498 |
audio_segments = [segment for _, _, segment in results if segment]
|
499 |
|
500 |
final_video = CompositeVideoClip([video] + text_clips)
|
501 |
|
502 |
-
if
|
503 |
try:
|
504 |
voice_audio = CompositeAudioClip(audio_segments).set_duration(video.duration)
|
505 |
|
@@ -643,7 +644,7 @@ def upload_and_manage(file, target_language, mode="transcription"):
|
|
643 |
|
644 |
# Step 3: Add transcript to video based on timestamps
|
645 |
logger.info("Adding translated transcript to video...")
|
646 |
-
add_transcript_voiceover(file.name, translated_json, output_video_path,
|
647 |
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
648 |
|
649 |
# Convert translated JSON into a format for the editable table
|
@@ -671,8 +672,8 @@ def build_interface():
|
|
671 |
with gr.Row():
|
672 |
with gr.Column(scale=4):
|
673 |
file_input = gr.File(label="Upload Video/Audio File")
|
674 |
-
language_input = gr.Dropdown(["en", "es", "fr", "zh"], label="Select Language") # Language codes
|
675 |
-
process_mode = gr.Radio(choices=["Transcription", "Transcription with
|
676 |
submit_button = gr.Button("Post and Process")
|
677 |
|
678 |
with gr.Column(scale=8):
|
@@ -733,7 +734,6 @@ def build_interface():
|
|
733 |
return demo
|
734 |
|
735 |
tts_model = None
|
736 |
-
global tts_model
|
737 |
# Launch the Gradio interface
|
738 |
demo = build_interface()
|
739 |
demo.launch()
|
|
|
325 |
|
326 |
return translated_json
|
327 |
|
328 |
+
def update_translations(file, edited_table, process_mode):
|
329 |
"""
|
330 |
Update the translations based on user edits in the Gradio Dataframe.
|
331 |
"""
|
|
|
351 |
]
|
352 |
|
353 |
# Call the function to process the video with updated translations
|
354 |
+
add_transcript_voiceover(file.name, updated_translations, output_video_path, process_mode)
|
355 |
|
356 |
# Calculate elapsed time
|
357 |
elapsed_time = time.time() - start_time
|
|
|
408 |
logger.error(f"\u274c Failed to create subtitle clip: {e}")
|
409 |
return None
|
410 |
|
411 |
+
def process_entry(entry, i, tts_model, video_width, video_height, process_mode, target_language, font_path, use_clone, speaker_sample_paths=None):
|
412 |
logger.debug(f"Processing entry {i}: {entry}")
|
413 |
error_message = None
|
414 |
|
|
|
420 |
txt_clip = None
|
421 |
|
422 |
audio_segment = None
|
423 |
+
if process_mode > 1:
|
424 |
try:
|
425 |
segment_audio_path = f"segment_{i}_voiceover.wav"
|
426 |
desired_duration = entry["end"] - entry["start"]
|
|
|
455 |
|
456 |
return i, txt_clip, audio_segment, error_message
|
457 |
|
458 |
+
def add_transcript_voiceover(video_path, translated_json, output_path, process_mode, target_language="en", speaker_sample_paths=None, use_clone=False):
|
459 |
video = VideoFileClip(video_path)
|
460 |
font_path = "./NotoSansSC-Regular.ttf"
|
461 |
|
|
|
463 |
audio_segments = []
|
464 |
error_messages = []
|
465 |
|
466 |
+
if process_mode == 3:
|
467 |
+
global tts_model
|
468 |
if tts_model is None:
|
469 |
try:
|
470 |
print("🔄 Loading XTTS model...")
|
|
|
477 |
## Need to implmenet backup option.
|
478 |
|
479 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
480 |
+
futures = [executor.submit(process_entry, entry, i, tts_model, video.w, video.h, process_mode, target_language, font_path, use_clone, speaker_sample_paths)
|
481 |
for i, entry in enumerate(translated_json)]
|
482 |
|
483 |
results = []
|
|
|
495 |
# Sort by entry index to ensure order
|
496 |
results.sort(key=lambda x: x[0])
|
497 |
text_clips = [clip for _, clip, _ in results if clip]
|
498 |
+
if process_mode>1:
|
499 |
audio_segments = [segment for _, _, segment in results if segment]
|
500 |
|
501 |
final_video = CompositeVideoClip([video] + text_clips)
|
502 |
|
503 |
+
if process_mode>1 and audio_segments:
|
504 |
try:
|
505 |
voice_audio = CompositeAudioClip(audio_segments).set_duration(video.duration)
|
506 |
|
|
|
644 |
|
645 |
# Step 3: Add transcript to video based on timestamps
|
646 |
logger.info("Adding translated transcript to video...")
|
647 |
+
add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language)
|
648 |
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
649 |
|
650 |
# Convert translated JSON into a format for the editable table
|
|
|
672 |
with gr.Row():
|
673 |
with gr.Column(scale=4):
|
674 |
file_input = gr.File(label="Upload Video/Audio File")
|
675 |
+
language_input = gr.Dropdown(["en", "es", "fr", "zh"], label="Select Language") # Language codes
|
676 |
+
process_mode = gr.Radio(choices=[("Transcription Only", 1),("Transcription with Premium Voice",2),("Transcription with Voice Clone", 3)],label="Choose Processing Type",value=1)
|
677 |
submit_button = gr.Button("Post and Process")
|
678 |
|
679 |
with gr.Column(scale=8):
|
|
|
734 |
return demo
|
735 |
|
736 |
tts_model = None
|
|
|
737 |
# Launch the Gradio interface
|
738 |
demo = build_interface()
|
739 |
demo.launch()
|