Update app.py
Browse files
app.py
CHANGED
|
@@ -325,7 +325,7 @@ def translate_text(transcription_json, source_language, target_language):
|
|
| 325 |
|
| 326 |
return translated_json
|
| 327 |
|
| 328 |
-
def update_translations(file, edited_table,
|
| 329 |
"""
|
| 330 |
Update the translations based on user edits in the Gradio Dataframe.
|
| 331 |
"""
|
|
@@ -351,7 +351,7 @@ def update_translations(file, edited_table, mode):
|
|
| 351 |
]
|
| 352 |
|
| 353 |
# Call the function to process the video with updated translations
|
| 354 |
-
add_transcript_voiceover(file.name, updated_translations, output_video_path,
|
| 355 |
|
| 356 |
# Calculate elapsed time
|
| 357 |
elapsed_time = time.time() - start_time
|
|
@@ -408,7 +408,7 @@ def create_subtitle_clip_pil(text, start_time, end_time, video_width, video_heig
|
|
| 408 |
logger.error(f"\u274c Failed to create subtitle clip: {e}")
|
| 409 |
return None
|
| 410 |
|
| 411 |
-
def process_entry(entry, i, tts_model, video_width, video_height,
|
| 412 |
logger.debug(f"Processing entry {i}: {entry}")
|
| 413 |
error_message = None
|
| 414 |
|
|
@@ -420,7 +420,7 @@ def process_entry(entry, i, tts_model, video_width, video_height, add_voiceover,
|
|
| 420 |
txt_clip = None
|
| 421 |
|
| 422 |
audio_segment = None
|
| 423 |
-
if
|
| 424 |
try:
|
| 425 |
segment_audio_path = f"segment_{i}_voiceover.wav"
|
| 426 |
desired_duration = entry["end"] - entry["start"]
|
|
@@ -455,7 +455,7 @@ def process_entry(entry, i, tts_model, video_width, video_height, add_voiceover,
|
|
| 455 |
|
| 456 |
return i, txt_clip, audio_segment, error_message
|
| 457 |
|
| 458 |
-
def add_transcript_voiceover(video_path, translated_json, output_path,
|
| 459 |
video = VideoFileClip(video_path)
|
| 460 |
font_path = "./NotoSansSC-Regular.ttf"
|
| 461 |
|
|
@@ -463,7 +463,8 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
|
|
| 463 |
audio_segments = []
|
| 464 |
error_messages = []
|
| 465 |
|
| 466 |
-
if
|
|
|
|
| 467 |
if tts_model is None:
|
| 468 |
try:
|
| 469 |
print("🔄 Loading XTTS model...")
|
|
@@ -476,7 +477,7 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
|
|
| 476 |
## Need to implmenet backup option.
|
| 477 |
|
| 478 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 479 |
-
futures = [executor.submit(process_entry, entry, i, tts_model, video.w, video.h,
|
| 480 |
for i, entry in enumerate(translated_json)]
|
| 481 |
|
| 482 |
results = []
|
|
@@ -494,12 +495,12 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
|
|
| 494 |
# Sort by entry index to ensure order
|
| 495 |
results.sort(key=lambda x: x[0])
|
| 496 |
text_clips = [clip for _, clip, _ in results if clip]
|
| 497 |
-
if
|
| 498 |
audio_segments = [segment for _, _, segment in results if segment]
|
| 499 |
|
| 500 |
final_video = CompositeVideoClip([video] + text_clips)
|
| 501 |
|
| 502 |
-
if
|
| 503 |
try:
|
| 504 |
voice_audio = CompositeAudioClip(audio_segments).set_duration(video.duration)
|
| 505 |
|
|
@@ -643,7 +644,7 @@ def upload_and_manage(file, target_language, mode="transcription"):
|
|
| 643 |
|
| 644 |
# Step 3: Add transcript to video based on timestamps
|
| 645 |
logger.info("Adding translated transcript to video...")
|
| 646 |
-
add_transcript_voiceover(file.name, translated_json, output_video_path,
|
| 647 |
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
| 648 |
|
| 649 |
# Convert translated JSON into a format for the editable table
|
|
@@ -671,8 +672,8 @@ def build_interface():
|
|
| 671 |
with gr.Row():
|
| 672 |
with gr.Column(scale=4):
|
| 673 |
file_input = gr.File(label="Upload Video/Audio File")
|
| 674 |
-
language_input = gr.Dropdown(["en", "es", "fr", "zh"], label="Select Language") # Language codes
|
| 675 |
-
process_mode = gr.Radio(choices=["Transcription", "Transcription with
|
| 676 |
submit_button = gr.Button("Post and Process")
|
| 677 |
|
| 678 |
with gr.Column(scale=8):
|
|
@@ -733,7 +734,6 @@ def build_interface():
|
|
| 733 |
return demo
|
| 734 |
|
| 735 |
tts_model = None
|
| 736 |
-
global tts_model
|
| 737 |
# Launch the Gradio interface
|
| 738 |
demo = build_interface()
|
| 739 |
demo.launch()
|
|
|
|
| 325 |
|
| 326 |
return translated_json
|
| 327 |
|
| 328 |
+
def update_translations(file, edited_table, process_mode):
|
| 329 |
"""
|
| 330 |
Update the translations based on user edits in the Gradio Dataframe.
|
| 331 |
"""
|
|
|
|
| 351 |
]
|
| 352 |
|
| 353 |
# Call the function to process the video with updated translations
|
| 354 |
+
add_transcript_voiceover(file.name, updated_translations, output_video_path, process_mode)
|
| 355 |
|
| 356 |
# Calculate elapsed time
|
| 357 |
elapsed_time = time.time() - start_time
|
|
|
|
| 408 |
logger.error(f"\u274c Failed to create subtitle clip: {e}")
|
| 409 |
return None
|
| 410 |
|
| 411 |
+
def process_entry(entry, i, tts_model, video_width, video_height, process_mode, target_language, font_path, use_clone, speaker_sample_paths=None):
|
| 412 |
logger.debug(f"Processing entry {i}: {entry}")
|
| 413 |
error_message = None
|
| 414 |
|
|
|
|
| 420 |
txt_clip = None
|
| 421 |
|
| 422 |
audio_segment = None
|
| 423 |
+
if process_mode > 1:
|
| 424 |
try:
|
| 425 |
segment_audio_path = f"segment_{i}_voiceover.wav"
|
| 426 |
desired_duration = entry["end"] - entry["start"]
|
|
|
|
| 455 |
|
| 456 |
return i, txt_clip, audio_segment, error_message
|
| 457 |
|
| 458 |
+
def add_transcript_voiceover(video_path, translated_json, output_path, process_mode, target_language="en", speaker_sample_paths=None, use_clone=False):
|
| 459 |
video = VideoFileClip(video_path)
|
| 460 |
font_path = "./NotoSansSC-Regular.ttf"
|
| 461 |
|
|
|
|
| 463 |
audio_segments = []
|
| 464 |
error_messages = []
|
| 465 |
|
| 466 |
+
if process_mode == 3:
|
| 467 |
+
global tts_model
|
| 468 |
if tts_model is None:
|
| 469 |
try:
|
| 470 |
print("🔄 Loading XTTS model...")
|
|
|
|
| 477 |
## Need to implmenet backup option.
|
| 478 |
|
| 479 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 480 |
+
futures = [executor.submit(process_entry, entry, i, tts_model, video.w, video.h, process_mode, target_language, font_path, use_clone, speaker_sample_paths)
|
| 481 |
for i, entry in enumerate(translated_json)]
|
| 482 |
|
| 483 |
results = []
|
|
|
|
| 495 |
# Sort by entry index to ensure order
|
| 496 |
results.sort(key=lambda x: x[0])
|
| 497 |
text_clips = [clip for _, clip, _ in results if clip]
|
| 498 |
+
if process_mode>1:
|
| 499 |
audio_segments = [segment for _, _, segment in results if segment]
|
| 500 |
|
| 501 |
final_video = CompositeVideoClip([video] + text_clips)
|
| 502 |
|
| 503 |
+
if process_mode>1 and audio_segments:
|
| 504 |
try:
|
| 505 |
voice_audio = CompositeAudioClip(audio_segments).set_duration(video.duration)
|
| 506 |
|
|
|
|
| 644 |
|
| 645 |
# Step 3: Add transcript to video based on timestamps
|
| 646 |
logger.info("Adding translated transcript to video...")
|
| 647 |
+
add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language)
|
| 648 |
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
| 649 |
|
| 650 |
# Convert translated JSON into a format for the editable table
|
|
|
|
| 672 |
with gr.Row():
|
| 673 |
with gr.Column(scale=4):
|
| 674 |
file_input = gr.File(label="Upload Video/Audio File")
|
| 675 |
+
language_input = gr.Dropdown(["en", "es", "fr", "zh"], label="Select Language") # Language codes
|
| 676 |
+
process_mode = gr.Radio(choices=[("Transcription Only", 1),("Transcription with Premium Voice",2),("Transcription with Voice Clone", 3)],label="Choose Processing Type",value=1)
|
| 677 |
submit_button = gr.Button("Post and Process")
|
| 678 |
|
| 679 |
with gr.Column(scale=8):
|
|
|
|
| 734 |
return demo
|
| 735 |
|
| 736 |
tts_model = None
|
|
|
|
| 737 |
# Launch the Gradio interface
|
| 738 |
demo = build_interface()
|
| 739 |
demo.launch()
|