qqwjq1981 commited on
Commit
aa415e7
·
verified ·
1 Parent(s): 4cba5c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -325,7 +325,7 @@ def translate_text(transcription_json, source_language, target_language):
325
 
326
  return translated_json
327
 
328
- def update_translations(file, edited_table, mode):
329
  """
330
  Update the translations based on user edits in the Gradio Dataframe.
331
  """
@@ -351,7 +351,7 @@ def update_translations(file, edited_table, mode):
351
  ]
352
 
353
  # Call the function to process the video with updated translations
354
- add_transcript_voiceover(file.name, updated_translations, output_video_path, mode=="Transcription with Voiceover")
355
 
356
  # Calculate elapsed time
357
  elapsed_time = time.time() - start_time
@@ -408,7 +408,7 @@ def create_subtitle_clip_pil(text, start_time, end_time, video_width, video_heig
408
  logger.error(f"\u274c Failed to create subtitle clip: {e}")
409
  return None
410
 
411
- def process_entry(entry, i, tts_model, video_width, video_height, add_voiceover, target_language, font_path, use_clone, speaker_sample_paths=None):
412
  logger.debug(f"Processing entry {i}: {entry}")
413
  error_message = None
414
 
@@ -420,7 +420,7 @@ def process_entry(entry, i, tts_model, video_width, video_height, add_voiceover,
420
  txt_clip = None
421
 
422
  audio_segment = None
423
- if add_voiceover:
424
  try:
425
  segment_audio_path = f"segment_{i}_voiceover.wav"
426
  desired_duration = entry["end"] - entry["start"]
@@ -455,7 +455,7 @@ def process_entry(entry, i, tts_model, video_width, video_height, add_voiceover,
455
 
456
  return i, txt_clip, audio_segment, error_message
457
 
458
- def add_transcript_voiceover(video_path, translated_json, output_path, add_voiceover=False, target_language="en", speaker_sample_paths=None, use_clone=False):
459
  video = VideoFileClip(video_path)
460
  font_path = "./NotoSansSC-Regular.ttf"
461
 
@@ -463,7 +463,8 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
463
  audio_segments = []
464
  error_messages = []
465
 
466
- if use_clone:
 
467
  if tts_model is None:
468
  try:
469
  print("🔄 Loading XTTS model...")
@@ -476,7 +477,7 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
476
  ## Need to implmenet backup option.
477
 
478
  with concurrent.futures.ThreadPoolExecutor() as executor:
479
- futures = [executor.submit(process_entry, entry, i, tts_model, video.w, video.h, add_voiceover, target_language, font_path, use_clone, speaker_sample_paths)
480
  for i, entry in enumerate(translated_json)]
481
 
482
  results = []
@@ -494,12 +495,12 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
494
  # Sort by entry index to ensure order
495
  results.sort(key=lambda x: x[0])
496
  text_clips = [clip for _, clip, _ in results if clip]
497
- if add_voiceover:
498
  audio_segments = [segment for _, _, segment in results if segment]
499
 
500
  final_video = CompositeVideoClip([video] + text_clips)
501
 
502
- if add_voiceover and audio_segments:
503
  try:
504
  voice_audio = CompositeAudioClip(audio_segments).set_duration(video.duration)
505
 
@@ -643,7 +644,7 @@ def upload_and_manage(file, target_language, mode="transcription"):
643
 
644
  # Step 3: Add transcript to video based on timestamps
645
  logger.info("Adding translated transcript to video...")
646
- add_transcript_voiceover(file.name, translated_json, output_video_path, mode == "Transcription with Voiceover", target_language)
647
  logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
648
 
649
  # Convert translated JSON into a format for the editable table
@@ -671,8 +672,8 @@ def build_interface():
671
  with gr.Row():
672
  with gr.Column(scale=4):
673
  file_input = gr.File(label="Upload Video/Audio File")
674
- language_input = gr.Dropdown(["en", "es", "fr", "zh"], label="Select Language") # Language codes
675
- process_mode = gr.Radio(choices=["Transcription", "Transcription with Voiceover"], label="Choose Processing Type", value="Transcription")
676
  submit_button = gr.Button("Post and Process")
677
 
678
  with gr.Column(scale=8):
@@ -733,7 +734,6 @@ def build_interface():
733
  return demo
734
 
735
  tts_model = None
736
- global tts_model
737
  # Launch the Gradio interface
738
  demo = build_interface()
739
  demo.launch()
 
325
 
326
  return translated_json
327
 
328
+ def update_translations(file, edited_table, process_mode):
329
  """
330
  Update the translations based on user edits in the Gradio Dataframe.
331
  """
 
351
  ]
352
 
353
  # Call the function to process the video with updated translations
354
+ add_transcript_voiceover(file.name, updated_translations, output_video_path, process_mode)
355
 
356
  # Calculate elapsed time
357
  elapsed_time = time.time() - start_time
 
408
  logger.error(f"\u274c Failed to create subtitle clip: {e}")
409
  return None
410
 
411
+ def process_entry(entry, i, tts_model, video_width, video_height, process_mode, target_language, font_path, use_clone, speaker_sample_paths=None):
412
  logger.debug(f"Processing entry {i}: {entry}")
413
  error_message = None
414
 
 
420
  txt_clip = None
421
 
422
  audio_segment = None
423
+ if process_mode > 1:
424
  try:
425
  segment_audio_path = f"segment_{i}_voiceover.wav"
426
  desired_duration = entry["end"] - entry["start"]
 
455
 
456
  return i, txt_clip, audio_segment, error_message
457
 
458
+ def add_transcript_voiceover(video_path, translated_json, output_path, process_mode, target_language="en", speaker_sample_paths=None, use_clone=False):
459
  video = VideoFileClip(video_path)
460
  font_path = "./NotoSansSC-Regular.ttf"
461
 
 
463
  audio_segments = []
464
  error_messages = []
465
 
466
+ if process_mode == 3:
467
+ global tts_model
468
  if tts_model is None:
469
  try:
470
  print("🔄 Loading XTTS model...")
 
477
  ## Need to implmenet backup option.
478
 
479
  with concurrent.futures.ThreadPoolExecutor() as executor:
480
+ futures = [executor.submit(process_entry, entry, i, tts_model, video.w, video.h, process_mode, target_language, font_path, use_clone, speaker_sample_paths)
481
  for i, entry in enumerate(translated_json)]
482
 
483
  results = []
 
495
  # Sort by entry index to ensure order
496
  results.sort(key=lambda x: x[0])
497
  text_clips = [clip for _, clip, _ in results if clip]
498
+ if process_mode>1:
499
  audio_segments = [segment for _, _, segment in results if segment]
500
 
501
  final_video = CompositeVideoClip([video] + text_clips)
502
 
503
+ if process_mode>1 and audio_segments:
504
  try:
505
  voice_audio = CompositeAudioClip(audio_segments).set_duration(video.duration)
506
 
 
644
 
645
  # Step 3: Add transcript to video based on timestamps
646
  logger.info("Adding translated transcript to video...")
647
+ add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language)
648
  logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
649
 
650
  # Convert translated JSON into a format for the editable table
 
672
  with gr.Row():
673
  with gr.Column(scale=4):
674
  file_input = gr.File(label="Upload Video/Audio File")
675
+ language_input = gr.Dropdown(["en", "es", "fr", "zh"], label="Select Language") # Language codes
676
+ process_mode = gr.Radio(choices=[("Transcription Only", 1),("Transcription with Premium Voice",2),("Transcription with Voice Clone", 3)],label="Choose Processing Type",value=1)
677
  submit_button = gr.Button("Post and Process")
678
 
679
  with gr.Column(scale=8):
 
734
  return demo
735
 
736
  tts_model = None
 
737
  # Launch the Gradio interface
738
  demo = build_interface()
739
  demo.launch()