qqwjq1981 commited on
Commit
76d739a
·
verified ·
1 Parent(s): 454faf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -52
app.py CHANGED
@@ -308,59 +308,51 @@ def update_translations(file, edited_table, mode):
308
  except Exception as e:
309
  raise ValueError(f"Error updating translations: {e}")
310
 
311
- def create_subtitle_clip_pil(entry, video_width, video_height, font_path="./NotoSansSC-Regular.ttf"):
312
- """
313
- Creates a PIL-based ImageClip for subtitle text (no ImageMagick needed).
314
- """
315
- subtitle_font_size = int(video_height // 20)
316
- subtitle_width = int(video_width * 0.8)
317
- text = entry["translated"]
318
 
 
319
  try:
 
 
320
  font = ImageFont.truetype(font_path, subtitle_font_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  except Exception as e:
322
- print(f"⚠️ Could not load font from {font_path}, using default font: {e}")
323
- font = ImageFont.load_default()
324
-
325
- # Estimate text height using multiline
326
- dummy_img = Image.new("RGBA", (subtitle_width, 1), (0, 0, 0, 0))
327
- draw = ImageDraw.Draw(dummy_img)
328
- lines = []
329
- line = ""
330
- for word in text.split():
331
- test_line = f"{line} {word}".strip()
332
- w, _ = draw.textsize(test_line, font=font)
333
- if w <= subtitle_width - 10:
334
- line = test_line
335
- else:
336
- lines.append(line)
337
- line = word
338
- lines.append(line)
339
-
340
- line_height = subtitle_font_size + 4
341
- total_height = len(lines) * line_height + 10
342
- img = Image.new("RGBA", (subtitle_width, total_height), (0, 0, 0, 0))
343
- draw = ImageDraw.Draw(img)
344
-
345
- for idx, l in enumerate(lines):
346
- draw.text((5, 5 + idx * line_height), l, font=font, fill=(255, 255, 0, 255))
347
-
348
- np_img = np.array(img)
349
-
350
- txt_clip = ImageClip(np_img, ismask=False).set_position(("center", "bottom")) \
351
- .set_start(entry["start"]).set_duration(entry["end"] - entry["start"]).set_opacity(0.8)
352
-
353
- return txt_clip
354
-
355
- def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, speaker_sample_paths=None):
356
  logger.debug(f"Processing entry {i}: {entry}")
357
 
358
- try:
359
- # Subtitle clip via PIL (robust, no ImageMagick needed)
360
- txt_clip = create_subtitle_clip_pil(entry, video_width, video_height)
361
- except Exception as e:
362
- logger.error(f"❌ Failed to create subtitle clip for entry {i}: {e}")
363
- txt_clip = None
364
 
365
  audio_segment = None
366
  if add_voiceover:
@@ -368,22 +360,24 @@ def process_entry(entry, i, video_width, video_height, add_voiceover, target_lan
368
  segment_audio_path = f"segment_{i}_voiceover.wav"
369
  desired_duration = entry["end"] - entry["start"]
370
  speaker_id = entry.get("speaker", "default")
371
- speaker_wav_path = speaker_sample_paths.get(speaker_id, None) if speaker_sample_paths else None
372
 
373
  generate_voiceover_clone([entry], desired_duration, target_language, speaker_wav_path, segment_audio_path)
374
 
 
 
 
375
  audio_clip = AudioFileClip(segment_audio_path)
376
  logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
377
 
378
  if audio_clip.duration < desired_duration:
379
  silence_duration = desired_duration - audio_clip.duration
380
- audio_clip = concatenate_audioclips([audio_clip, silence(duration=silence_duration)])
381
- logger.info(f"Padded audio with {silence_duration:.2f}s silence.")
382
 
383
  audio_segment = audio_clip.set_start(entry["start"]).set_duration(desired_duration)
384
-
385
  except Exception as e:
386
- logger.error(f" Failed to generate audio segment for entry {i}: {e}")
387
  audio_segment = None
388
 
389
  return i, txt_clip, audio_segment
 
308
  except Exception as e:
309
  raise ValueError(f"Error updating translations: {e}")
310
 
 
 
 
 
 
 
 
311
 
312
+ def create_subtitle_clip_pil(text, start_time, end_time, video_width, video_height, font_path):
313
  try:
314
+ subtitle_width = int(video_width * 0.8)
315
+ subtitle_font_size = int(video_height // 20)
316
  font = ImageFont.truetype(font_path, subtitle_font_size)
317
+
318
+ dummy_img = Image.new("RGBA", (subtitle_width, 1), (0, 0, 0, 0))
319
+ draw = ImageDraw.Draw(dummy_img)
320
+
321
+ lines = []
322
+ line = ""
323
+ for word in text.split():
324
+ test_line = f"{line} {word}".strip()
325
+ bbox = draw.textbbox((0, 0), test_line, font=font)
326
+ w = bbox[2] - bbox[0]
327
+ if w <= subtitle_width - 10:
328
+ line = test_line
329
+ else:
330
+ lines.append(line)
331
+ line = word
332
+ lines.append(line)
333
+
334
+ line_heights = [draw.textbbox((0, 0), l, font=font)[3] - draw.textbbox((0, 0), l, font=font)[1] for l in lines]
335
+ total_height = sum(line_heights) + (len(lines) - 1) * 5
336
+ img = Image.new("RGBA", (subtitle_width, total_height), (0, 0, 0, 0))
337
+ draw = ImageDraw.Draw(img)
338
+
339
+ y = 0
340
+ for idx, line in enumerate(lines):
341
+ bbox = draw.textbbox((0, 0), line, font=font)
342
+ w = bbox[2] - bbox[0]
343
+ draw.text(((subtitle_width - w) // 2, y), line, font=font, fill="yellow")
344
+ y += line_heights[idx] + 5
345
+
346
+ txt_clip = ImageClip(img).set_start(start_time).set_duration(end_time - start_time).set_position("bottom").set_opacity(0.8)
347
+ return txt_clip
348
  except Exception as e:
349
+ logger.error(f"\u274c Failed to create subtitle clip: {e}")
350
+ return None
351
+
352
+ def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, font_path, speaker_sample_paths=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  logger.debug(f"Processing entry {i}: {entry}")
354
 
355
+ txt_clip = create_subtitle_clip_pil(entry["translated"], entry["start"], entry["end"], video_width, video_height, font_path)
 
 
 
 
 
356
 
357
  audio_segment = None
358
  if add_voiceover:
 
360
  segment_audio_path = f"segment_{i}_voiceover.wav"
361
  desired_duration = entry["end"] - entry["start"]
362
  speaker_id = entry.get("speaker", "default")
363
+ speaker_wav_path = speaker_sample_paths.get(speaker_id, "speaker_default.wav") if speaker_sample_paths else "speaker_default.wav"
364
 
365
  generate_voiceover_clone([entry], desired_duration, target_language, speaker_wav_path, segment_audio_path)
366
 
367
+ if not os.path.exists(segment_audio_path):
368
+ raise FileNotFoundError(f"Voiceover file not generated at: {segment_audio_path}")
369
+
370
  audio_clip = AudioFileClip(segment_audio_path)
371
  logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
372
 
373
  if audio_clip.duration < desired_duration:
374
  silence_duration = desired_duration - audio_clip.duration
375
+ audio_clip = concatenate_audioclips([audio_clip, AudioClip(lambda t: 0, duration=silence_duration)])
376
+ logger.info(f"Padded audio with {silence_duration} seconds of silence.")
377
 
378
  audio_segment = audio_clip.set_start(entry["start"]).set_duration(desired_duration)
 
379
  except Exception as e:
380
+ logger.error(f"\u274c Failed to generate audio segment for entry {i}: {e}")
381
  audio_segment = None
382
 
383
  return i, txt_clip, audio_segment