qqwjq1981 commited on
Commit
42c9b0e
·
verified ·
1 Parent(s): f852623

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -145,7 +145,7 @@ def transcribe_video_with_speakers(video_path):
145
  logger.info("WhisperX model loaded")
146
 
147
  # Transcribe
148
- result = model.transcribe(audio_path, chunk_size=5, print_progress = True)
149
  logger.info("Audio transcription completed")
150
 
151
  # Get the detected language
@@ -474,12 +474,12 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
474
  logger.error(msg)
475
  return None, msg, msg
476
 
477
- # Truncate text based on max token assumption (~60 tokens)
478
- MAX_TTS_TOKENS = 60
479
- tokens = full_text.split() # crude token count
480
- if len(tokens) > MAX_TTS_TOKENS:
481
- logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
482
- full_text = " ".join(tokens[:MAX_TTS_TOKENS])
483
 
484
  speed_tts = calculate_speed(full_text, desired_duration)
485
  tts.tts_to_file(
@@ -487,7 +487,8 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
487
  speaker_wav=speaker_wav_path,
488
  language=target_language,
489
  file_path=output_audio_path,
490
- speed=speed_tts
 
491
  )
492
 
493
  if not os.path.exists(output_audio_path):
 
145
  logger.info("WhisperX model loaded")
146
 
147
  # Transcribe
148
+ result = model.transcribe(audio_path, chunk_size=6, print_progress = True)
149
  logger.info("Audio transcription completed")
150
 
151
  # Get the detected language
 
474
  logger.error(msg)
475
  return None, msg, msg
476
 
477
+ # # Truncate text based on max token assumption (~60 tokens)
478
+ # MAX_TTS_TOKENS = 60
479
+ # tokens = full_text.split() # crude token count
480
+ # if len(tokens) > MAX_TTS_TOKENS:
481
+ # logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
482
+ # full_text = " ".join(tokens[:MAX_TTS_TOKENS])
483
 
484
  speed_tts = calculate_speed(full_text, desired_duration)
485
  tts.tts_to_file(
 
487
  speaker_wav=speaker_wav_path,
488
  language=target_language,
489
  file_path=output_audio_path,
490
+ speed=speed_tts,
491
+ split_sentences=True
492
  )
493
 
494
  if not os.path.exists(output_audio_path):