Spaces:

Curify
/

studio_V1

Sleeping

App Files Files Community

qqwjq1981 commited on Apr 5

Commit

aa415e7

verified ·

1 Parent(s): 4cba5c4

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -13

app.py CHANGED Viewed

@@ -325,7 +325,7 @@ def translate_text(transcription_json, source_language, target_language):
     return translated_json
-def update_translations(file, edited_table, mode):
     """
     Update the translations based on user edits in the Gradio Dataframe.
     """
@@ -351,7 +351,7 @@ def update_translations(file, edited_table, mode):
         ]
         # Call the function to process the video with updated translations
-        add_transcript_voiceover(file.name, updated_translations, output_video_path, mode=="Transcription with Voiceover")
         # Calculate elapsed time
         elapsed_time = time.time() - start_time
@@ -408,7 +408,7 @@ def create_subtitle_clip_pil(text, start_time, end_time, video_width, video_heig
         logger.error(f"\u274c Failed to create subtitle clip: {e}")
         return None
-def process_entry(entry, i, tts_model, video_width, video_height, add_voiceover, target_language, font_path, use_clone, speaker_sample_paths=None):
     logger.debug(f"Processing entry {i}: {entry}")
     error_message = None
@@ -420,7 +420,7 @@ def process_entry(entry, i, tts_model, video_width, video_height, add_voiceover,
         txt_clip = None
     audio_segment = None
-    if add_voiceover:
         try:
             segment_audio_path = f"segment_{i}_voiceover.wav"
             desired_duration = entry["end"] - entry["start"]
@@ -455,7 +455,7 @@ def process_entry(entry, i, tts_model, video_width, video_height, add_voiceover,
     return i, txt_clip, audio_segment, error_message
-def add_transcript_voiceover(video_path, translated_json, output_path, add_voiceover=False, target_language="en", speaker_sample_paths=None, use_clone=False):
     video = VideoFileClip(video_path)
     font_path = "./NotoSansSC-Regular.ttf"
@@ -463,7 +463,8 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
     audio_segments = []
     error_messages = []
-    if use_clone:
         if tts_model is None:
             try:
                 print("🔄 Loading XTTS model...")
@@ -476,7 +477,7 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
                 ## Need to implmenet backup option.
     with concurrent.futures.ThreadPoolExecutor() as executor:
-        futures = [executor.submit(process_entry, entry, i, tts_model, video.w, video.h, add_voiceover, target_language, font_path, use_clone, speaker_sample_paths)
                    for i, entry in enumerate(translated_json)]
         results = []
@@ -494,12 +495,12 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
     # Sort by entry index to ensure order
     results.sort(key=lambda x: x[0])
     text_clips = [clip for _, clip, _ in results if clip]
-    if add_voiceover:
         audio_segments = [segment for _, _, segment in results if segment]
     final_video = CompositeVideoClip([video] + text_clips)
-    if add_voiceover and audio_segments:
         try:
             voice_audio = CompositeAudioClip(audio_segments).set_duration(video.duration)
@@ -643,7 +644,7 @@ def upload_and_manage(file, target_language, mode="transcription"):
         # Step 3: Add transcript to video based on timestamps
         logger.info("Adding translated transcript to video...")
-        add_transcript_voiceover(file.name, translated_json, output_video_path, mode == "Transcription with Voiceover", target_language)
         logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
         # Convert translated JSON into a format for the editable table
@@ -671,8 +672,8 @@ def build_interface():
         with gr.Row():
             with gr.Column(scale=4):
                 file_input = gr.File(label="Upload Video/Audio File")
-                language_input = gr.Dropdown(["en", "es", "fr", "zh"], label="Select Language")  # Language codes
-                process_mode = gr.Radio(choices=["Transcription", "Transcription with Voiceover"], label="Choose Processing Type", value="Transcription")
                 submit_button = gr.Button("Post and Process")
             with gr.Column(scale=8):
@@ -733,7 +734,6 @@ def build_interface():
     return demo
 tts_model = None
-global tts_model
 # Launch the Gradio interface
 demo = build_interface()
 demo.launch()

     return translated_json
+def update_translations(file, edited_table, process_mode):
     """
     Update the translations based on user edits in the Gradio Dataframe.
     """
         ]
         # Call the function to process the video with updated translations
+        add_transcript_voiceover(file.name, updated_translations, output_video_path, process_mode)
         # Calculate elapsed time
         elapsed_time = time.time() - start_time
         logger.error(f"\u274c Failed to create subtitle clip: {e}")
         return None
+def process_entry(entry, i, tts_model, video_width, video_height, process_mode, target_language, font_path, use_clone, speaker_sample_paths=None):
     logger.debug(f"Processing entry {i}: {entry}")
     error_message = None
         txt_clip = None
     audio_segment = None
+    if process_mode > 1:
         try:
             segment_audio_path = f"segment_{i}_voiceover.wav"
             desired_duration = entry["end"] - entry["start"]
     return i, txt_clip, audio_segment, error_message
+def add_transcript_voiceover(video_path, translated_json, output_path, process_mode, target_language="en", speaker_sample_paths=None, use_clone=False):
     video = VideoFileClip(video_path)
     font_path = "./NotoSansSC-Regular.ttf"
     audio_segments = []
     error_messages = []
+    if process_mode == 3:
+        global tts_model
         if tts_model is None:
             try:
                 print("🔄 Loading XTTS model...")
                 ## Need to implmenet backup option.
     with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = [executor.submit(process_entry, entry, i, tts_model, video.w, video.h, process_mode, target_language, font_path, use_clone, speaker_sample_paths)
                    for i, entry in enumerate(translated_json)]
         results = []
     # Sort by entry index to ensure order
     results.sort(key=lambda x: x[0])
     text_clips = [clip for _, clip, _ in results if clip]
+    if process_mode>1:
         audio_segments = [segment for _, _, segment in results if segment]
     final_video = CompositeVideoClip([video] + text_clips)
+    if process_mode>1 and audio_segments:
         try:
             voice_audio = CompositeAudioClip(audio_segments).set_duration(video.duration)
         # Step 3: Add transcript to video based on timestamps
         logger.info("Adding translated transcript to video...")
+        add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language)
         logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
         # Convert translated JSON into a format for the editable table
         with gr.Row():
             with gr.Column(scale=4):
                 file_input = gr.File(label="Upload Video/Audio File")
+                language_input = gr.Dropdown(["en", "es", "fr", "zh"], label="Select Language")  # Language codes
+                process_mode = gr.Radio(choices=[("Transcription Only", 1),("Transcription with Premium Voice",2),("Transcription with Voice Clone", 3)],label="Choose Processing Type",value=1)
                 submit_button = gr.Button("Post and Process")
             with gr.Column(scale=8):
     return demo
 tts_model = None
 # Launch the Gradio interface
 demo = build_interface()
 demo.launch()