Spaces:

Curify
/

studio_V1

Sleeping

App Files Files Community

qqwjq1981 commited on Mar 18

Commit

76d739a

verified ·

1 Parent(s): 454faf6

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -52

app.py CHANGED Viewed

@@ -308,59 +308,51 @@ def update_translations(file, edited_table, mode):
     except Exception as e:
         raise ValueError(f"Error updating translations: {e}")
-def create_subtitle_clip_pil(entry, video_width, video_height, font_path="./NotoSansSC-Regular.ttf"):
-    """
-    Creates a PIL-based ImageClip for subtitle text (no ImageMagick needed).
-    """
-    subtitle_font_size = int(video_height // 20)
-    subtitle_width = int(video_width * 0.8)
-    text = entry["translated"]
     try:
         font = ImageFont.truetype(font_path, subtitle_font_size)
     except Exception as e:
-        print(f"⚠️ Could not load font from {font_path}, using default font: {e}")
-        font = ImageFont.load_default()
-    # Estimate text height using multiline
-    dummy_img = Image.new("RGBA", (subtitle_width, 1), (0, 0, 0, 0))
-    draw = ImageDraw.Draw(dummy_img)
-    lines = []
-    line = ""
-    for word in text.split():
-        test_line = f"{line} {word}".strip()
-        w, _ = draw.textsize(test_line, font=font)
-        if w <= subtitle_width - 10:
-            line = test_line
-        else:
-            lines.append(line)
-            line = word
-    lines.append(line)
-    line_height = subtitle_font_size + 4
-    total_height = len(lines) * line_height + 10
-    img = Image.new("RGBA", (subtitle_width, total_height), (0, 0, 0, 0))
-    draw = ImageDraw.Draw(img)
-    for idx, l in enumerate(lines):
-        draw.text((5, 5 + idx * line_height), l, font=font, fill=(255, 255, 0, 255))
-    np_img = np.array(img)
-    txt_clip = ImageClip(np_img, ismask=False).set_position(("center", "bottom")) \
-        .set_start(entry["start"]).set_duration(entry["end"] - entry["start"]).set_opacity(0.8)
-    return txt_clip
-def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, speaker_sample_paths=None):
     logger.debug(f"Processing entry {i}: {entry}")
-    try:
-        # Subtitle clip via PIL (robust, no ImageMagick needed)
-        txt_clip = create_subtitle_clip_pil(entry, video_width, video_height)
-    except Exception as e:
-        logger.error(f"❌ Failed to create subtitle clip for entry {i}: {e}")
-        txt_clip = None
     audio_segment = None
     if add_voiceover:
@@ -368,22 +360,24 @@ def process_entry(entry, i, video_width, video_height, add_voiceover, target_lan
             segment_audio_path = f"segment_{i}_voiceover.wav"
             desired_duration = entry["end"] - entry["start"]
             speaker_id = entry.get("speaker", "default")
-            speaker_wav_path = speaker_sample_paths.get(speaker_id, None) if speaker_sample_paths else None
             generate_voiceover_clone([entry], desired_duration, target_language, speaker_wav_path, segment_audio_path)
             audio_clip = AudioFileClip(segment_audio_path)
             logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
             if audio_clip.duration < desired_duration:
                 silence_duration = desired_duration - audio_clip.duration
-                audio_clip = concatenate_audioclips([audio_clip, silence(duration=silence_duration)])
-                logger.info(f"Padded audio with {silence_duration:.2f}s silence.")
             audio_segment = audio_clip.set_start(entry["start"]).set_duration(desired_duration)
         except Exception as e:
-            logger.error(f"❌ Failed to generate audio segment for entry {i}: {e}")
             audio_segment = None
     return i, txt_clip, audio_segment

     except Exception as e:
         raise ValueError(f"Error updating translations: {e}")
+def create_subtitle_clip_pil(text, start_time, end_time, video_width, video_height, font_path):
     try:
+        subtitle_width = int(video_width * 0.8)
+        subtitle_font_size = int(video_height // 20)
         font = ImageFont.truetype(font_path, subtitle_font_size)
+        dummy_img = Image.new("RGBA", (subtitle_width, 1), (0, 0, 0, 0))
+        draw = ImageDraw.Draw(dummy_img)
+        lines = []
+        line = ""
+        for word in text.split():
+            test_line = f"{line} {word}".strip()
+            bbox = draw.textbbox((0, 0), test_line, font=font)
+            w = bbox[2] - bbox[0]
+            if w <= subtitle_width - 10:
+                line = test_line
+            else:
+                lines.append(line)
+                line = word
+        lines.append(line)
+        line_heights = [draw.textbbox((0, 0), l, font=font)[3] - draw.textbbox((0, 0), l, font=font)[1] for l in lines]
+        total_height = sum(line_heights) + (len(lines) - 1) * 5
+        img = Image.new("RGBA", (subtitle_width, total_height), (0, 0, 0, 0))
+        draw = ImageDraw.Draw(img)
+        y = 0
+        for idx, line in enumerate(lines):
+            bbox = draw.textbbox((0, 0), line, font=font)
+            w = bbox[2] - bbox[0]
+            draw.text(((subtitle_width - w) // 2, y), line, font=font, fill="yellow")
+            y += line_heights[idx] + 5
+        txt_clip = ImageClip(img).set_start(start_time).set_duration(end_time - start_time).set_position("bottom").set_opacity(0.8)
+        return txt_clip
     except Exception as e:
+        logger.error(f"\u274c Failed to create subtitle clip: {e}")
+        return None
+def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, font_path, speaker_sample_paths=None):
     logger.debug(f"Processing entry {i}: {entry}")
+    txt_clip = create_subtitle_clip_pil(entry["translated"], entry["start"], entry["end"], video_width, video_height, font_path)
     audio_segment = None
     if add_voiceover:
             segment_audio_path = f"segment_{i}_voiceover.wav"
             desired_duration = entry["end"] - entry["start"]
             speaker_id = entry.get("speaker", "default")
+            speaker_wav_path = speaker_sample_paths.get(speaker_id, "speaker_default.wav") if speaker_sample_paths else "speaker_default.wav"
             generate_voiceover_clone([entry], desired_duration, target_language, speaker_wav_path, segment_audio_path)
+            if not os.path.exists(segment_audio_path):
+                raise FileNotFoundError(f"Voiceover file not generated at: {segment_audio_path}")
             audio_clip = AudioFileClip(segment_audio_path)
             logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
             if audio_clip.duration < desired_duration:
                 silence_duration = desired_duration - audio_clip.duration
+                audio_clip = concatenate_audioclips([audio_clip, AudioClip(lambda t: 0, duration=silence_duration)])
+                logger.info(f"Padded audio with {silence_duration} seconds of silence.")
             audio_segment = audio_clip.set_start(entry["start"]).set_duration(desired_duration)
         except Exception as e:
+            logger.error(f"\u274c Failed to generate audio segment for entry {i}: {e}")
             audio_segment = None
     return i, txt_clip, audio_segment