Update app.py
Browse files
app.py
CHANGED
@@ -145,7 +145,7 @@ def transcribe_video_with_speakers(video_path):
|
|
145 |
logger.info("WhisperX model loaded")
|
146 |
|
147 |
# Transcribe
|
148 |
-
result = model.transcribe(audio_path, chunk_size=
|
149 |
logger.info("Audio transcription completed")
|
150 |
|
151 |
# Get the detected language
|
@@ -474,12 +474,12 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
|
|
474 |
logger.error(msg)
|
475 |
return None, msg, msg
|
476 |
|
477 |
-
# Truncate text based on max token assumption (~60 tokens)
|
478 |
-
MAX_TTS_TOKENS = 60
|
479 |
-
tokens = full_text.split() # crude token count
|
480 |
-
if len(tokens) > MAX_TTS_TOKENS:
|
481 |
-
|
482 |
-
|
483 |
|
484 |
speed_tts = calculate_speed(full_text, desired_duration)
|
485 |
tts.tts_to_file(
|
@@ -487,7 +487,8 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
|
|
487 |
speaker_wav=speaker_wav_path,
|
488 |
language=target_language,
|
489 |
file_path=output_audio_path,
|
490 |
-
speed=speed_tts
|
|
|
491 |
)
|
492 |
|
493 |
if not os.path.exists(output_audio_path):
|
|
|
145 |
logger.info("WhisperX model loaded")
|
146 |
|
147 |
# Transcribe
|
148 |
+
result = model.transcribe(audio_path, chunk_size=6, print_progress = True)
|
149 |
logger.info("Audio transcription completed")
|
150 |
|
151 |
# Get the detected language
|
|
|
474 |
logger.error(msg)
|
475 |
return None, msg, msg
|
476 |
|
477 |
+
# # Truncate text based on max token assumption (~60 tokens)
|
478 |
+
# MAX_TTS_TOKENS = 60
|
479 |
+
# tokens = full_text.split() # crude token count
|
480 |
+
# if len(tokens) > MAX_TTS_TOKENS:
|
481 |
+
# logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
|
482 |
+
# full_text = " ".join(tokens[:MAX_TTS_TOKENS])
|
483 |
|
484 |
speed_tts = calculate_speed(full_text, desired_duration)
|
485 |
tts.tts_to_file(
|
|
|
487 |
speaker_wav=speaker_wav_path,
|
488 |
language=target_language,
|
489 |
file_path=output_audio_path,
|
490 |
+
speed=speed_tts,
|
491 |
+
split_sentences=True
|
492 |
)
|
493 |
|
494 |
if not os.path.exists(output_audio_path):
|