Update app.py
Browse files
app.py
CHANGED
|
@@ -145,7 +145,7 @@ def transcribe_video_with_speakers(video_path):
|
|
| 145 |
logger.info("WhisperX model loaded")
|
| 146 |
|
| 147 |
# Transcribe
|
| 148 |
-
result = model.transcribe(audio_path, chunk_size=
|
| 149 |
logger.info("Audio transcription completed")
|
| 150 |
|
| 151 |
# Get the detected language
|
|
@@ -474,12 +474,12 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
|
|
| 474 |
logger.error(msg)
|
| 475 |
return None, msg, msg
|
| 476 |
|
| 477 |
-
# Truncate text based on max token assumption (~60 tokens)
|
| 478 |
-
MAX_TTS_TOKENS = 60
|
| 479 |
-
tokens = full_text.split() # crude token count
|
| 480 |
-
if len(tokens) > MAX_TTS_TOKENS:
|
| 481 |
-
|
| 482 |
-
|
| 483 |
|
| 484 |
speed_tts = calculate_speed(full_text, desired_duration)
|
| 485 |
tts.tts_to_file(
|
|
@@ -487,7 +487,8 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
|
|
| 487 |
speaker_wav=speaker_wav_path,
|
| 488 |
language=target_language,
|
| 489 |
file_path=output_audio_path,
|
| 490 |
-
speed=speed_tts
|
|
|
|
| 491 |
)
|
| 492 |
|
| 493 |
if not os.path.exists(output_audio_path):
|
|
|
|
| 145 |
logger.info("WhisperX model loaded")
|
| 146 |
|
| 147 |
# Transcribe
|
| 148 |
+
result = model.transcribe(audio_path, chunk_size=6, print_progress = True)
|
| 149 |
logger.info("Audio transcription completed")
|
| 150 |
|
| 151 |
# Get the detected language
|
|
|
|
| 474 |
logger.error(msg)
|
| 475 |
return None, msg, msg
|
| 476 |
|
| 477 |
+
# # Truncate text based on max token assumption (~60 tokens)
|
| 478 |
+
# MAX_TTS_TOKENS = 60
|
| 479 |
+
# tokens = full_text.split() # crude token count
|
| 480 |
+
# if len(tokens) > MAX_TTS_TOKENS:
|
| 481 |
+
# logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
|
| 482 |
+
# full_text = " ".join(tokens[:MAX_TTS_TOKENS])
|
| 483 |
|
| 484 |
speed_tts = calculate_speed(full_text, desired_duration)
|
| 485 |
tts.tts_to_file(
|
|
|
|
| 487 |
speaker_wav=speaker_wav_path,
|
| 488 |
language=target_language,
|
| 489 |
file_path=output_audio_path,
|
| 490 |
+
speed=speed_tts,
|
| 491 |
+
split_sentences=True
|
| 492 |
)
|
| 493 |
|
| 494 |
if not os.path.exists(output_audio_path):
|