qqwjq1981 commited on
Commit
52d4080
·
verified ·
1 Parent(s): 42c9b0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -19
app.py CHANGED
@@ -481,7 +481,7 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
481
  # logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
482
  # full_text = " ".join(tokens[:MAX_TTS_TOKENS])
483
 
484
- speed_tts = calculate_speed(full_text, desired_duration)
485
  tts.tts_to_file(
486
  text=full_text,
487
  speaker_wav=speaker_wav_path,
@@ -506,25 +506,27 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
506
  logger.error(traceback.format_exc())
507
  return None, err_msg, err_msg
508
 
509
- def truncated_linear(x):
510
- if x < 15:
511
- return 1.01 # Ensure speed is strictly > 1
512
- elif x > 25:
513
- return 1.3
 
 
 
 
 
 
 
 
 
 
 
514
  else:
515
- slope = (1.3 - 1) / (25 - 15)
516
- speed = 1 + slope * (x - 15)
517
- return max(speed, 1.01) # Ensure lower bound is > 1
518
-
519
- def calculate_speed(text, desired_duration):
520
- # Calculate characters per second
521
- char_count = len(text)
522
- chars_per_second = char_count / (desired_duration + 0.001)
523
-
524
- # Apply truncated linear function to get speed
525
- speed = truncated_linear(chars_per_second)
526
-
527
- return speed
528
 
529
  def upload_and_manage(file, target_language, mode="transcription"):
530
  if file is None:
 
481
  # logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
482
  # full_text = " ".join(tokens[:MAX_TTS_TOKENS])
483
 
484
+ speed_tts = calibrated_speed(full_text, desired_duration)
485
  tts.tts_to_file(
486
  text=full_text,
487
  speaker_wav=speaker_wav_path,
 
506
  logger.error(traceback.format_exc())
507
  return None, err_msg, err_msg
508
 
509
+ def calibrated_speed(text, desired_duration):
510
+ """
511
+ Compute a speed factor to help TTS fit audio into desired duration,
512
+ using a simple truncated linear function of characters per second.
513
+ """
514
+ char_count = len(text.strip())
515
+ if char_count == 0 or desired_duration <= 0:
516
+ return 1.0 # fallback
517
+
518
+ cps = char_count / desired_duration # characters per second
519
+
520
+ # Truncated linear mapping
521
+ if cps < 10:
522
+ return 1.0
523
+ elif cps > 25:
524
+ return 1.4
525
  else:
526
+ # Linearly scale between cps 10 -> 25 and speed 1.0 -> 1.3
527
+ slope = (1.4 - 1.0) / (25 - 10)
528
+ return 1.0 + slope * (cps - 10)
529
+
 
 
 
 
 
 
 
 
 
530
 
531
  def upload_and_manage(file, target_language, mode="transcription"):
532
  if file is None: