Update app.py
Browse files
app.py
CHANGED
@@ -481,7 +481,7 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
|
|
481 |
# logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
|
482 |
# full_text = " ".join(tokens[:MAX_TTS_TOKENS])
|
483 |
|
484 |
-
speed_tts =
|
485 |
tts.tts_to_file(
|
486 |
text=full_text,
|
487 |
speaker_wav=speaker_wav_path,
|
@@ -506,25 +506,27 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
|
|
506 |
logger.error(traceback.format_exc())
|
507 |
return None, err_msg, err_msg
|
508 |
|
509 |
-
def
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
514 |
else:
|
515 |
-
|
516 |
-
|
517 |
-
return
|
518 |
-
|
519 |
-
def calculate_speed(text, desired_duration):
|
520 |
-
# Calculate characters per second
|
521 |
-
char_count = len(text)
|
522 |
-
chars_per_second = char_count / (desired_duration + 0.001)
|
523 |
-
|
524 |
-
# Apply truncated linear function to get speed
|
525 |
-
speed = truncated_linear(chars_per_second)
|
526 |
-
|
527 |
-
return speed
|
528 |
|
529 |
def upload_and_manage(file, target_language, mode="transcription"):
|
530 |
if file is None:
|
|
|
481 |
# logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
|
482 |
# full_text = " ".join(tokens[:MAX_TTS_TOKENS])
|
483 |
|
484 |
+
speed_tts = calibrated_speed(full_text, desired_duration)
|
485 |
tts.tts_to_file(
|
486 |
text=full_text,
|
487 |
speaker_wav=speaker_wav_path,
|
|
|
506 |
logger.error(traceback.format_exc())
|
507 |
return None, err_msg, err_msg
|
508 |
|
509 |
+
def calibrated_speed(text, desired_duration):
|
510 |
+
"""
|
511 |
+
Compute a speed factor to help TTS fit audio into desired duration,
|
512 |
+
using a simple truncated linear function of characters per second.
|
513 |
+
"""
|
514 |
+
char_count = len(text.strip())
|
515 |
+
if char_count == 0 or desired_duration <= 0:
|
516 |
+
return 1.0 # fallback
|
517 |
+
|
518 |
+
cps = char_count / desired_duration # characters per second
|
519 |
+
|
520 |
+
# Truncated linear mapping
|
521 |
+
if cps < 10:
|
522 |
+
return 1.0
|
523 |
+
elif cps > 25:
|
524 |
+
return 1.4
|
525 |
else:
|
526 |
+
# Linearly scale between cps 10 -> 25 and speed 1.0 -> 1.3
|
527 |
+
slope = (1.4 - 1.0) / (25 - 10)
|
528 |
+
return 1.0 + slope * (cps - 10)
|
529 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
530 |
|
531 |
def upload_and_manage(file, target_language, mode="transcription"):
|
532 |
if file is None:
|