Update app.py
Browse files
app.py
CHANGED
@@ -308,59 +308,51 @@ def update_translations(file, edited_table, mode):
|
|
308 |
except Exception as e:
|
309 |
raise ValueError(f"Error updating translations: {e}")
|
310 |
|
311 |
-
def create_subtitle_clip_pil(entry, video_width, video_height, font_path="./NotoSansSC-Regular.ttf"):
|
312 |
-
"""
|
313 |
-
Creates a PIL-based ImageClip for subtitle text (no ImageMagick needed).
|
314 |
-
"""
|
315 |
-
subtitle_font_size = int(video_height // 20)
|
316 |
-
subtitle_width = int(video_width * 0.8)
|
317 |
-
text = entry["translated"]
|
318 |
|
|
|
319 |
try:
|
|
|
|
|
320 |
font = ImageFont.truetype(font_path, subtitle_font_size)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
except Exception as e:
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
dummy_img = Image.new("RGBA", (subtitle_width, 1), (0, 0, 0, 0))
|
327 |
-
draw = ImageDraw.Draw(dummy_img)
|
328 |
-
lines = []
|
329 |
-
line = ""
|
330 |
-
for word in text.split():
|
331 |
-
test_line = f"{line} {word}".strip()
|
332 |
-
w, _ = draw.textsize(test_line, font=font)
|
333 |
-
if w <= subtitle_width - 10:
|
334 |
-
line = test_line
|
335 |
-
else:
|
336 |
-
lines.append(line)
|
337 |
-
line = word
|
338 |
-
lines.append(line)
|
339 |
-
|
340 |
-
line_height = subtitle_font_size + 4
|
341 |
-
total_height = len(lines) * line_height + 10
|
342 |
-
img = Image.new("RGBA", (subtitle_width, total_height), (0, 0, 0, 0))
|
343 |
-
draw = ImageDraw.Draw(img)
|
344 |
-
|
345 |
-
for idx, l in enumerate(lines):
|
346 |
-
draw.text((5, 5 + idx * line_height), l, font=font, fill=(255, 255, 0, 255))
|
347 |
-
|
348 |
-
np_img = np.array(img)
|
349 |
-
|
350 |
-
txt_clip = ImageClip(np_img, ismask=False).set_position(("center", "bottom")) \
|
351 |
-
.set_start(entry["start"]).set_duration(entry["end"] - entry["start"]).set_opacity(0.8)
|
352 |
-
|
353 |
-
return txt_clip
|
354 |
-
|
355 |
-
def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, speaker_sample_paths=None):
|
356 |
logger.debug(f"Processing entry {i}: {entry}")
|
357 |
|
358 |
-
|
359 |
-
# Subtitle clip via PIL (robust, no ImageMagick needed)
|
360 |
-
txt_clip = create_subtitle_clip_pil(entry, video_width, video_height)
|
361 |
-
except Exception as e:
|
362 |
-
logger.error(f"❌ Failed to create subtitle clip for entry {i}: {e}")
|
363 |
-
txt_clip = None
|
364 |
|
365 |
audio_segment = None
|
366 |
if add_voiceover:
|
@@ -368,22 +360,24 @@ def process_entry(entry, i, video_width, video_height, add_voiceover, target_lan
|
|
368 |
segment_audio_path = f"segment_{i}_voiceover.wav"
|
369 |
desired_duration = entry["end"] - entry["start"]
|
370 |
speaker_id = entry.get("speaker", "default")
|
371 |
-
speaker_wav_path = speaker_sample_paths.get(speaker_id,
|
372 |
|
373 |
generate_voiceover_clone([entry], desired_duration, target_language, speaker_wav_path, segment_audio_path)
|
374 |
|
|
|
|
|
|
|
375 |
audio_clip = AudioFileClip(segment_audio_path)
|
376 |
logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
|
377 |
|
378 |
if audio_clip.duration < desired_duration:
|
379 |
silence_duration = desired_duration - audio_clip.duration
|
380 |
-
audio_clip = concatenate_audioclips([audio_clip,
|
381 |
-
logger.info(f"Padded audio with {silence_duration
|
382 |
|
383 |
audio_segment = audio_clip.set_start(entry["start"]).set_duration(desired_duration)
|
384 |
-
|
385 |
except Exception as e:
|
386 |
-
logger.error(f"
|
387 |
audio_segment = None
|
388 |
|
389 |
return i, txt_clip, audio_segment
|
|
|
308 |
except Exception as e:
|
309 |
raise ValueError(f"Error updating translations: {e}")
|
310 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
312 |
+
def create_subtitle_clip_pil(text, start_time, end_time, video_width, video_height, font_path):
|
313 |
try:
|
314 |
+
subtitle_width = int(video_width * 0.8)
|
315 |
+
subtitle_font_size = int(video_height // 20)
|
316 |
font = ImageFont.truetype(font_path, subtitle_font_size)
|
317 |
+
|
318 |
+
dummy_img = Image.new("RGBA", (subtitle_width, 1), (0, 0, 0, 0))
|
319 |
+
draw = ImageDraw.Draw(dummy_img)
|
320 |
+
|
321 |
+
lines = []
|
322 |
+
line = ""
|
323 |
+
for word in text.split():
|
324 |
+
test_line = f"{line} {word}".strip()
|
325 |
+
bbox = draw.textbbox((0, 0), test_line, font=font)
|
326 |
+
w = bbox[2] - bbox[0]
|
327 |
+
if w <= subtitle_width - 10:
|
328 |
+
line = test_line
|
329 |
+
else:
|
330 |
+
lines.append(line)
|
331 |
+
line = word
|
332 |
+
lines.append(line)
|
333 |
+
|
334 |
+
line_heights = [draw.textbbox((0, 0), l, font=font)[3] - draw.textbbox((0, 0), l, font=font)[1] for l in lines]
|
335 |
+
total_height = sum(line_heights) + (len(lines) - 1) * 5
|
336 |
+
img = Image.new("RGBA", (subtitle_width, total_height), (0, 0, 0, 0))
|
337 |
+
draw = ImageDraw.Draw(img)
|
338 |
+
|
339 |
+
y = 0
|
340 |
+
for idx, line in enumerate(lines):
|
341 |
+
bbox = draw.textbbox((0, 0), line, font=font)
|
342 |
+
w = bbox[2] - bbox[0]
|
343 |
+
draw.text(((subtitle_width - w) // 2, y), line, font=font, fill="yellow")
|
344 |
+
y += line_heights[idx] + 5
|
345 |
+
|
346 |
+
txt_clip = ImageClip(img).set_start(start_time).set_duration(end_time - start_time).set_position("bottom").set_opacity(0.8)
|
347 |
+
return txt_clip
|
348 |
except Exception as e:
|
349 |
+
logger.error(f"\u274c Failed to create subtitle clip: {e}")
|
350 |
+
return None
|
351 |
+
|
352 |
+
def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, font_path, speaker_sample_paths=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
logger.debug(f"Processing entry {i}: {entry}")
|
354 |
|
355 |
+
txt_clip = create_subtitle_clip_pil(entry["translated"], entry["start"], entry["end"], video_width, video_height, font_path)
|
|
|
|
|
|
|
|
|
|
|
356 |
|
357 |
audio_segment = None
|
358 |
if add_voiceover:
|
|
|
360 |
segment_audio_path = f"segment_{i}_voiceover.wav"
|
361 |
desired_duration = entry["end"] - entry["start"]
|
362 |
speaker_id = entry.get("speaker", "default")
|
363 |
+
speaker_wav_path = speaker_sample_paths.get(speaker_id, "speaker_default.wav") if speaker_sample_paths else "speaker_default.wav"
|
364 |
|
365 |
generate_voiceover_clone([entry], desired_duration, target_language, speaker_wav_path, segment_audio_path)
|
366 |
|
367 |
+
if not os.path.exists(segment_audio_path):
|
368 |
+
raise FileNotFoundError(f"Voiceover file not generated at: {segment_audio_path}")
|
369 |
+
|
370 |
audio_clip = AudioFileClip(segment_audio_path)
|
371 |
logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
|
372 |
|
373 |
if audio_clip.duration < desired_duration:
|
374 |
silence_duration = desired_duration - audio_clip.duration
|
375 |
+
audio_clip = concatenate_audioclips([audio_clip, AudioClip(lambda t: 0, duration=silence_duration)])
|
376 |
+
logger.info(f"Padded audio with {silence_duration} seconds of silence.")
|
377 |
|
378 |
audio_segment = audio_clip.set_start(entry["start"]).set_duration(desired_duration)
|
|
|
379 |
except Exception as e:
|
380 |
+
logger.error(f"\u274c Failed to generate audio segment for entry {i}: {e}")
|
381 |
audio_segment = None
|
382 |
|
383 |
return i, txt_clip, audio_segment
|