Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import moviepy
|
|
8 |
from transformers import pipeline
|
9 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
10 |
from moviepy.editor import (
|
|
|
11 |
VideoFileClip,
|
12 |
TextClip,
|
13 |
CompositeVideoClip,
|
@@ -16,6 +17,7 @@ from moviepy.editor import (
|
|
16 |
concatenate_videoclips,
|
17 |
concatenate_audioclips
|
18 |
)
|
|
|
19 |
from moviepy.audio.AudioClip import AudioArrayClip
|
20 |
import subprocess
|
21 |
import speech_recognition as sr
|
@@ -306,47 +308,83 @@ def update_translations(file, edited_table, mode):
|
|
306 |
except Exception as e:
|
307 |
raise ValueError(f"Error updating translations: {e}")
|
308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, speaker_sample_paths=None):
|
310 |
logger.debug(f"Processing entry {i}: {entry}")
|
311 |
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
stroke_width=2,
|
319 |
-
fontsize=int(video_height // 20),
|
320 |
-
method='label',
|
321 |
-
).with_start(entry["start"]).with_duration(entry["end"] - entry["start"]).with_position(('bottom')).with_opacity(0.8)
|
322 |
|
323 |
audio_segment = None
|
324 |
if add_voiceover:
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
audio_clip = AudioFileClip(segment_audio_path)
|
332 |
-
# Get and log all methods in AudioFileClip
|
333 |
-
logger.info("Methods in AudioFileClip:")
|
334 |
-
for method in dir(audio_clip):
|
335 |
-
logger.info(method)
|
336 |
-
|
337 |
-
# Log duration of the audio clip and the desired duration for debugging.
|
338 |
-
logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
|
339 |
|
340 |
-
|
341 |
-
# Pad with silence if audio is too short
|
342 |
-
silence_duration = desired_duration - audio_clip.duration
|
343 |
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
return i, txt_clip, audio_segment
|
352 |
|
|
|
8 |
from transformers import pipeline
|
9 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
10 |
from moviepy.editor import (
|
11 |
+
ImageClip,
|
12 |
VideoFileClip,
|
13 |
TextClip,
|
14 |
CompositeVideoClip,
|
|
|
17 |
concatenate_videoclips,
|
18 |
concatenate_audioclips
|
19 |
)
|
20 |
+
from PIL import Image, ImageDraw, ImageFont
|
21 |
from moviepy.audio.AudioClip import AudioArrayClip
|
22 |
import subprocess
|
23 |
import speech_recognition as sr
|
|
|
308 |
except Exception as e:
|
309 |
raise ValueError(f"Error updating translations: {e}")
|
310 |
|
311 |
+
def create_subtitle_clip_pil(entry, video_width, video_height, font_path="./NotoSansSC-Regular.ttf"):
|
312 |
+
"""
|
313 |
+
Creates a PIL-based ImageClip for subtitle text (no ImageMagick needed).
|
314 |
+
"""
|
315 |
+
subtitle_font_size = int(video_height // 20)
|
316 |
+
subtitle_width = int(video_width * 0.8)
|
317 |
+
text = entry["translated"]
|
318 |
+
|
319 |
+
try:
|
320 |
+
font = ImageFont.truetype(font_path, subtitle_font_size)
|
321 |
+
except Exception as e:
|
322 |
+
print(f"⚠️ Could not load font from {font_path}, using default font: {e}")
|
323 |
+
font = ImageFont.load_default()
|
324 |
+
|
325 |
+
# Estimate text height using multiline
|
326 |
+
dummy_img = Image.new("RGBA", (subtitle_width, 1), (0, 0, 0, 0))
|
327 |
+
draw = ImageDraw.Draw(dummy_img)
|
328 |
+
lines = []
|
329 |
+
line = ""
|
330 |
+
for word in text.split():
|
331 |
+
test_line = f"{line} {word}".strip()
|
332 |
+
w, _ = draw.textsize(test_line, font=font)
|
333 |
+
if w <= subtitle_width - 10:
|
334 |
+
line = test_line
|
335 |
+
else:
|
336 |
+
lines.append(line)
|
337 |
+
line = word
|
338 |
+
lines.append(line)
|
339 |
+
|
340 |
+
line_height = subtitle_font_size + 4
|
341 |
+
total_height = len(lines) * line_height + 10
|
342 |
+
img = Image.new("RGBA", (subtitle_width, total_height), (0, 0, 0, 0))
|
343 |
+
draw = ImageDraw.Draw(img)
|
344 |
+
|
345 |
+
for idx, l in enumerate(lines):
|
346 |
+
draw.text((5, 5 + idx * line_height), l, font=font, fill=(255, 255, 0, 255))
|
347 |
+
|
348 |
+
np_img = np.array(img)
|
349 |
+
|
350 |
+
txt_clip = ImageClip(np_img, ismask=False).set_position(("center", "bottom")) \
|
351 |
+
.set_start(entry["start"]).set_duration(entry["end"] - entry["start"]).set_opacity(0.8)
|
352 |
+
|
353 |
+
return txt_clip
|
354 |
+
|
355 |
def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, speaker_sample_paths=None):
|
356 |
logger.debug(f"Processing entry {i}: {entry}")
|
357 |
|
358 |
+
try:
|
359 |
+
# Subtitle clip via PIL (robust, no ImageMagick needed)
|
360 |
+
txt_clip = create_subtitle_clip_pil(entry, video_width, video_height)
|
361 |
+
except Exception as e:
|
362 |
+
logger.error(f"❌ Failed to create subtitle clip for entry {i}: {e}")
|
363 |
+
txt_clip = None
|
|
|
|
|
|
|
|
|
364 |
|
365 |
audio_segment = None
|
366 |
if add_voiceover:
|
367 |
+
try:
|
368 |
+
segment_audio_path = f"segment_{i}_voiceover.wav"
|
369 |
+
desired_duration = entry["end"] - entry["start"]
|
370 |
+
speaker_id = entry.get("speaker", "default")
|
371 |
+
speaker_wav_path = speaker_sample_paths.get(speaker_id, None) if speaker_sample_paths else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
|
373 |
+
generate_voiceover_clone([entry], desired_duration, target_language, speaker_wav_path, segment_audio_path)
|
|
|
|
|
374 |
|
375 |
+
audio_clip = AudioFileClip(segment_audio_path)
|
376 |
+
logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
|
377 |
+
|
378 |
+
if audio_clip.duration < desired_duration:
|
379 |
+
silence_duration = desired_duration - audio_clip.duration
|
380 |
+
audio_clip = concatenate_audioclips([audio_clip, silence(duration=silence_duration)])
|
381 |
+
logger.info(f"Padded audio with {silence_duration:.2f}s silence.")
|
382 |
+
|
383 |
+
audio_segment = audio_clip.set_start(entry["start"]).set_duration(desired_duration)
|
384 |
+
|
385 |
+
except Exception as e:
|
386 |
+
logger.error(f"❌ Failed to generate audio segment for entry {i}: {e}")
|
387 |
+
audio_segment = None
|
388 |
|
389 |
return i, txt_clip, audio_segment
|
390 |
|