Spaces:

Curify
/

studio_V1

Sleeping

qqwjq1981 commited on Mar 31

Commit

42c9b0e

verified ·

1 Parent(s): f852623

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -145,7 +145,7 @@ def transcribe_video_with_speakers(video_path):
         logger.info("WhisperX model loaded")
         # Transcribe
-        result = model.transcribe(audio_path, chunk_size=5, print_progress = True)
         logger.info("Audio transcription completed")
         # Get the detected language
@@ -474,12 +474,12 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
             logger.error(msg)
             return None, msg, msg
-        # Truncate text based on max token assumption (~60 tokens)
-        MAX_TTS_TOKENS = 60
-        tokens = full_text.split()  # crude token count
-        if len(tokens) > MAX_TTS_TOKENS:
-            logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
-            full_text = " ".join(tokens[:MAX_TTS_TOKENS])
         speed_tts = calculate_speed(full_text, desired_duration)
         tts.tts_to_file(
@@ -487,7 +487,8 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
             speaker_wav=speaker_wav_path,
             language=target_language,
             file_path=output_audio_path,
-            speed=speed_tts
         )
         if not os.path.exists(output_audio_path):

         logger.info("WhisperX model loaded")
         # Transcribe
+        result = model.transcribe(audio_path, chunk_size=6, print_progress = True)
         logger.info("Audio transcription completed")
         # Get the detected language
             logger.error(msg)
             return None, msg, msg
+        # # Truncate text based on max token assumption (~60 tokens)
+        # MAX_TTS_TOKENS = 60
+        # tokens = full_text.split()  # crude token count
+        # if len(tokens) > MAX_TTS_TOKENS:
+        #     logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
+        #     full_text = " ".join(tokens[:MAX_TTS_TOKENS])
         speed_tts = calculate_speed(full_text, desired_duration)
         tts.tts_to_file(
             speaker_wav=speaker_wav_path,
             language=target_language,
             file_path=output_audio_path,
+            speed=speed_tts,
+            split_sentences=True
         )
         if not os.path.exists(output_audio_path):