qqwjq1981 commited on
Commit
e1b0b64
·
verified ·
1 Parent(s): 0eccd9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -33
app.py CHANGED
@@ -8,6 +8,7 @@ import moviepy
8
  from transformers import pipeline
9
  from transformers.pipelines.audio_utils import ffmpeg_read
10
  from moviepy.editor import (
 
11
  VideoFileClip,
12
  TextClip,
13
  CompositeVideoClip,
@@ -16,6 +17,7 @@ from moviepy.editor import (
16
  concatenate_videoclips,
17
  concatenate_audioclips
18
  )
 
19
  from moviepy.audio.AudioClip import AudioArrayClip
20
  import subprocess
21
  import speech_recognition as sr
@@ -306,47 +308,83 @@ def update_translations(file, edited_table, mode):
306
  except Exception as e:
307
  raise ValueError(f"Error updating translations: {e}")
308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, speaker_sample_paths=None):
310
  logger.debug(f"Processing entry {i}: {entry}")
311
 
312
- # Create text clip for subtitles
313
- txt_clip = TextClip(
314
- txt=entry["translated"],
315
- font="./NotoSansSC-Regular.ttf",
316
- color='yellow',
317
- stroke_color='black',
318
- stroke_width=2,
319
- fontsize=int(video_height // 20),
320
- method='label',
321
- ).with_start(entry["start"]).with_duration(entry["end"] - entry["start"]).with_position(('bottom')).with_opacity(0.8)
322
 
323
  audio_segment = None
324
  if add_voiceover:
325
- segment_audio_path = f"segment_{i}_voiceover.wav"
326
- desired_duration = entry["end"] - entry["start"]
327
- speaker_id = entry["speaker"] # Extract the speaker ID
328
- speaker_wav_path = f"speaker_{speaker_id}_sample.wav" # pass the intermediate value to prevent from breaking.
329
- generate_voiceover_clone([entry], desired_duration, target_language, speaker_wav_path, segment_audio_path)
330
-
331
- audio_clip = AudioFileClip(segment_audio_path)
332
- # Get and log all methods in AudioFileClip
333
- logger.info("Methods in AudioFileClip:")
334
- for method in dir(audio_clip):
335
- logger.info(method)
336
-
337
- # Log duration of the audio clip and the desired duration for debugging.
338
- logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
339
 
340
- if audio_clip.duration < desired_duration:
341
- # Pad with silence if audio is too short
342
- silence_duration = desired_duration - audio_clip.duration
343
 
344
- # Concatenate the original audio and silence
345
- audio_clip = concatenate_audioclips([audio_clip, silence(duration=silence_duration)])
346
- logger.info(f"Padded audio with {silence_duration} seconds of silence.")
347
-
348
- # Set the audio_segment to the required duration.
349
- audio_segment = audio_clip.with_start(entry["start"]).with_duration(desired_duration)
 
 
 
 
 
 
 
350
 
351
  return i, txt_clip, audio_segment
352
 
 
8
  from transformers import pipeline
9
  from transformers.pipelines.audio_utils import ffmpeg_read
10
  from moviepy.editor import (
11
+ ImageClip,
12
  VideoFileClip,
13
  TextClip,
14
  CompositeVideoClip,
 
17
  concatenate_videoclips,
18
  concatenate_audioclips
19
  )
20
+ from PIL import Image, ImageDraw, ImageFont
21
  from moviepy.audio.AudioClip import AudioArrayClip
22
  import subprocess
23
  import speech_recognition as sr
 
308
  except Exception as e:
309
  raise ValueError(f"Error updating translations: {e}")
310
 
311
+ def create_subtitle_clip_pil(entry, video_width, video_height, font_path="./NotoSansSC-Regular.ttf"):
312
+ """
313
+ Creates a PIL-based ImageClip for subtitle text (no ImageMagick needed).
314
+ """
315
+ subtitle_font_size = int(video_height // 20)
316
+ subtitle_width = int(video_width * 0.8)
317
+ text = entry["translated"]
318
+
319
+ try:
320
+ font = ImageFont.truetype(font_path, subtitle_font_size)
321
+ except Exception as e:
322
+ print(f"⚠️ Could not load font from {font_path}, using default font: {e}")
323
+ font = ImageFont.load_default()
324
+
325
+ # Estimate text height using multiline
326
+ dummy_img = Image.new("RGBA", (subtitle_width, 1), (0, 0, 0, 0))
327
+ draw = ImageDraw.Draw(dummy_img)
328
+ lines = []
329
+ line = ""
330
+ for word in text.split():
331
+ test_line = f"{line} {word}".strip()
332
+ w, _ = draw.textsize(test_line, font=font)
333
+ if w <= subtitle_width - 10:
334
+ line = test_line
335
+ else:
336
+ lines.append(line)
337
+ line = word
338
+ lines.append(line)
339
+
340
+ line_height = subtitle_font_size + 4
341
+ total_height = len(lines) * line_height + 10
342
+ img = Image.new("RGBA", (subtitle_width, total_height), (0, 0, 0, 0))
343
+ draw = ImageDraw.Draw(img)
344
+
345
+ for idx, l in enumerate(lines):
346
+ draw.text((5, 5 + idx * line_height), l, font=font, fill=(255, 255, 0, 255))
347
+
348
+ np_img = np.array(img)
349
+
350
+ txt_clip = ImageClip(np_img, ismask=False).set_position(("center", "bottom")) \
351
+ .set_start(entry["start"]).set_duration(entry["end"] - entry["start"]).set_opacity(0.8)
352
+
353
+ return txt_clip
354
+
355
  def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, speaker_sample_paths=None):
356
  logger.debug(f"Processing entry {i}: {entry}")
357
 
358
+ try:
359
+ # Subtitle clip via PIL (robust, no ImageMagick needed)
360
+ txt_clip = create_subtitle_clip_pil(entry, video_width, video_height)
361
+ except Exception as e:
362
+ logger.error(f"❌ Failed to create subtitle clip for entry {i}: {e}")
363
+ txt_clip = None
 
 
 
 
364
 
365
  audio_segment = None
366
  if add_voiceover:
367
+ try:
368
+ segment_audio_path = f"segment_{i}_voiceover.wav"
369
+ desired_duration = entry["end"] - entry["start"]
370
+ speaker_id = entry.get("speaker", "default")
371
+ speaker_wav_path = speaker_sample_paths.get(speaker_id, None) if speaker_sample_paths else None
 
 
 
 
 
 
 
 
 
372
 
373
+ generate_voiceover_clone([entry], desired_duration, target_language, speaker_wav_path, segment_audio_path)
 
 
374
 
375
+ audio_clip = AudioFileClip(segment_audio_path)
376
+ logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
377
+
378
+ if audio_clip.duration < desired_duration:
379
+ silence_duration = desired_duration - audio_clip.duration
380
+ audio_clip = concatenate_audioclips([audio_clip, silence(duration=silence_duration)])
381
+ logger.info(f"Padded audio with {silence_duration:.2f}s silence.")
382
+
383
+ audio_segment = audio_clip.set_start(entry["start"]).set_duration(desired_duration)
384
+
385
+ except Exception as e:
386
+ logger.error(f"❌ Failed to generate audio segment for entry {i}: {e}")
387
+ audio_segment = None
388
 
389
  return i, txt_clip, audio_segment
390