Spaces:

Curify
/

Studio_V0

Running

App Files Files Community

qqwjq1981 commited on Feb 20

Commit

18bcebb

verified ·

1 Parent(s): c63dd84

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -68

app.py CHANGED Viewed

@@ -189,56 +189,6 @@ def translate_text(transcription_json, source_language, target_language):
     # Return the translated timestamps as a JSON string
     return translated_json
-def add_transcript_to_video(video_path, translated_json, output_path):
-    # Load the video file
-    video = VideoFileClip(video_path)
-    # Create text clips based on timestamps
-    text_clips = []
-    logger.debug("Full translated_json: %s", translated_json)
-    # Define relative font size based on video height (adjust this value as necessary)
-    # Define relative font size based on video height (adjust this value as necessary)
-    subtitle_font_size = int(video.h // 15)  # Ensure it's an integer
-    # Set maximum width for subtitle wrapping (80% of video width)
-    max_subtitle_width = int(video.w * 0.8)  # Ensure it's an integer
-    font_path = "./NotoSansSC-Regular.ttf"
-    for entry in translated_json:
-        logger.debug("Processing entry: %s", entry)
-        # Ensure `entry` is a dictionary with keys "start", "end", and "translated"
-        if isinstance(entry, dict) and "translated" in entry:
-            txt_clip = TextClip(
-                text=entry["translated"],
-                font=font_path,
-                method='caption',
-                color='yellow',
-                font_size=subtitle_font_size,  # Use relative font size
-                size=(max_subtitle_width, None)  # Restrict the width to ensure wrapping
-            ).with_start(entry["start"]).with_duration(entry["end"] - entry["start"]).with_position(('bottom')).with_opacity(0.7)
-            text_clips.append(txt_clip)
-        else:
-            raise ValueError(f"Invalid entry format: {entry}")
-    # Overlay all text clips on the original video
-    final_video = CompositeVideoClip([video] + text_clips)
-    # Write the result to a file
-    final_video.write_videofile(output_path, codec='libx264', audio_codec='aac')
-# Mock functions for platform actions and analytics
-def mock_post_to_platform(platform, content_title):
-    return f"Content '{content_title}' successfully posted on {platform}!"
-def mock_analytics():
-    return {
-        "YouTube": {"Views": random.randint(1000, 5000), "Engagement Rate": f"{random.uniform(5, 15):.2f}%"},
-        "Instagram": {"Views": random.randint(500, 3000), "Engagement Rate": f"{random.uniform(10, 20):.2f}%"},
-    }
 def update_translations(file, edited_table):
     """
@@ -262,7 +212,7 @@ def update_translations(file, edited_table):
         ]
         # Call the function to process the video with updated translations
-        add_transcript_to_video(file.name, updated_translations, output_video_path)
         # Calculate elapsed time
         elapsed_time = time.time() - start_time
@@ -273,27 +223,96 @@ def update_translations(file, edited_table):
     except Exception as e:
         raise ValueError(f"Error updating translations: {e}")
 def generate_voiceover(translated_json, language, output_audio_path):
     # Concatenate translated text into a single string
     full_text = " ".join(entry["translated"] for entry in translated_json)
-    # Generate speech
     tts = gTTS(text=full_text, lang=language)
     tts.save(output_audio_path)
 def replace_audio_in_video(video_path: str, new_audio_path: str, final_video_path: str):
     try:
         # Load the video file
         logger.info(f"Loading video from: {video_path}")
         video = VideoFileClip(video_path)
         # Load the new audio file
         logger.info(f"Loading audio from: {new_audio_path}")
         new_audio = AudioFileClip(new_audio_path)
-        # Log available methods for debugging
-        logger.info(f"Methods available in VideoFileClip: {dir(video)}")
         # Set the new audio to the video
         logger.info("Replacing video audio...")
@@ -304,10 +323,24 @@ def replace_audio_in_video(video_path: str, new_audio_path: str, final_video_pat
         video.write_videofile(final_video_path, codec="libx264", audio_codec="aac")
         logger.info("Video processing completed successfully.")
     except Exception as e:
         logger.error(f"Error replacing audio in video: {e}")
 def upload_and_manage(file, target_language, mode="transcription"):
     if file is None:
         logger.info("No file uploaded. Please upload a video/audio file.")
@@ -337,17 +370,9 @@ def upload_and_manage(file, target_language, mode="transcription"):
         # Step 3: Add transcript to video based on timestamps
         logger.info("Adding translated transcript to video...")
-        add_transcript_to_video(file.name, translated_json, output_video_path)
         logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
-        # Step 4 (Optional): Generate voiceover if mode is "Transcription with Voiceover"
-        if mode == "Transcription with Voiceover":
-            logger.info("Generating voiceover for video...")
-            generate_voiceover(translated_json, target_language, voiceover_path)
-            logger.info("Voiceover generated. Replacing audio in video...")
-            replace_audio_in_video(output_video_path, voiceover_path, output_video_path)
-            logger.info("Audio replaced in video.")
         # Convert translated JSON into a format for the editable table
         logger.info("Converting translated JSON into editable table format...")
         editable_table = [

     # Return the translated timestamps as a JSON string
     return translated_json
 def update_translations(file, edited_table):
     """
         ]
         # Call the function to process the video with updated translations
+        add_transcript_voiceover(file.name, updated_translations, output_video_path)
         # Calculate elapsed time
         elapsed_time = time.time() - start_time
     except Exception as e:
         raise ValueError(f"Error updating translations: {e}")
+def add_transcript_voiceover(video_path, translated_json, output_path, add_voiceover=False, target_language="en"):
+    """
+    Add transcript and voiceover to a video, segment by segment.
+    """
+    # Load the video file
+    video = VideoFileClip(video_path)
+    # Create text clips based on timestamps
+    text_clips = []
+    audio_segments = []
+    # Define relative font size based on video height (adjust this value as necessary)
+    subtitle_font_size = int(video.h // 15)  # Ensure it's an integer
+    max_subtitle_width = int(video.w * 0.8)  # 80% of video width
+    font_path = "./NotoSansSC-Regular.ttf"
+    for i, entry in enumerate(translated_json):
+        logger.debug(f"Processing entry {i}: {entry}")
+        # Ensure `entry` is a dictionary with keys "start", "end", and "translated"
+        if isinstance(entry, dict) and "translated" in entry:
+            # Create text clip for subtitles
+            txt_clip = TextClip(
+                text=entry["translated"],
+                font=font_path,
+                method='caption',
+                color='yellow',
+                fontsize=subtitle_font_size,
+                size=(max_subtitle_width, None)
+            ).with_start(entry["start"]).with_duration(entry["end"] - entry["start"]).with_position(('bottom')).with_opacity(0.7)
+            text_clips.append(txt_clip)
+            # Generate voiceover for this segment, if needed
+            if add_voiceover:
+                segment_audio_path = f"segment_{i}_voiceover.wav"
+                generate_voiceover([entry], target_language, segment_audio_path)
+                audio_segment = AudioFileClip(segment_audio_path).subclip(0, entry["end"] - entry["start"])
+                audio_segments.append(audio_segment)
+        else:
+            raise ValueError(f"Invalid entry format: {entry}")
+    # Combine the text clips
+    final_video = CompositeVideoClip([video] + text_clips)
+    # Concatenate all audio segments if voiceover was added
+    if add_voiceover:
+        final_audio = sum(audio_segments, AudioFileClip("silent_audio.wav"))  # Mix all audio segments
+        final_audio = final_audio.subclip(0, video.duration)  # Ensure the final audio matches the video duration
+        final_video = final_video.set_audio(final_audio)
+    # Write the result to a file
+    logger.info(f"Saving the final video to: {output_path}")
+    final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")
+    logger.info("Video processing completed successfully.")
 def generate_voiceover(translated_json, language, output_audio_path):
+    """
+    Generate voiceover from translated text for a given language.
+    """
     # Concatenate translated text into a single string
     full_text = " ".join(entry["translated"] for entry in translated_json)
+    # Generate speech and save to file
     tts = gTTS(text=full_text, lang=language)
     tts.save(output_audio_path)
 def replace_audio_in_video(video_path: str, new_audio_path: str, final_video_path: str):
+    """
+    Replace the audio in the video with the provided new audio.
+    """
     try:
         # Load the video file
         logger.info(f"Loading video from: {video_path}")
         video = VideoFileClip(video_path)
         # Load the new audio file
         logger.info(f"Loading audio from: {new_audio_path}")
         new_audio = AudioFileClip(new_audio_path)
+        # Ensure the audio matches the video's duration
+        audio_duration = new_audio.duration
+        video_duration = video.duration
+        if audio_duration < video_duration:
+            logger.info(f"Audio is shorter than video. Looping audio to match video duration.")
+            new_audio = new_audio.fx("audio_loop", duration=video_duration)
+        elif audio_duration > video_duration:
+            logger.info(f"Audio is longer than video. Truncating audio.")
+            new_audio = new_audio.subclip(0, video_duration)
         # Set the new audio to the video
         logger.info("Replacing video audio...")
         video.write_videofile(final_video_path, codec="libx264", audio_codec="aac")
         logger.info("Video processing completed successfully.")
     except Exception as e:
         logger.error(f"Error replacing audio in video: {e}")
+def check_for_time_gaps(translated_json):
+    """
+    Ensure there are no gaps in the timestamps, and adjust if necessary.
+    """
+    for i in range(1, len(translated_json)):
+        prev_end = translated_json[i - 1]["end"]
+        curr_start = translated_json[i]["start"]
+        if prev_end > curr_start:
+            logger.warning(f"Found gap between segments at {i}. Adjusting timestamps.")
+            # Optionally, adjust the start time of the next segment
+            translated_json[i]["start"] = prev_end  # You can adjust this to smooth the transition
+    return translated_json
 def upload_and_manage(file, target_language, mode="transcription"):
     if file is None:
         logger.info("No file uploaded. Please upload a video/audio file.")
         # Step 3: Add transcript to video based on timestamps
         logger.info("Adding translated transcript to video...")
+        add_transcript_voiceover(video_path, translated_json, output_path, mode == "Transcription with Voiceover", target_language):
         logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
         # Convert translated JSON into a format for the editable table
         logger.info("Converting translated JSON into editable table format...")
         editable_table = [