Update app.py
Browse files
app.py
CHANGED
@@ -430,35 +430,69 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
|
|
430 |
|
431 |
logger.info("Video processing completed successfully.")
|
432 |
|
433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
def generate_voiceover_clone(translated_json, desired_duration, target_language, speaker_wav_path, output_audio_path):
|
435 |
try:
|
436 |
-
|
437 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
if not speaker_wav_path or not os.path.exists(speaker_wav_path):
|
439 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
440 |
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
|
|
|
|
445 |
|
446 |
-
#
|
447 |
tts.tts_to_file(
|
448 |
text=full_text,
|
449 |
speaker_wav=speaker_wav_path,
|
450 |
-
language=
|
451 |
-
file_path=output_audio_path
|
452 |
-
|
|
|
453 |
)
|
454 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
return output_audio_path, "β
Voice cloning completed successfully."
|
456 |
|
457 |
except Exception as e:
|
458 |
-
|
459 |
-
traceback.
|
460 |
-
|
461 |
-
|
462 |
|
463 |
def truncated_linear(x):
|
464 |
if x < 15:
|
|
|
430 |
|
431 |
logger.info("Video processing completed successfully.")
|
432 |
|
433 |
+
import os
|
434 |
+
import traceback
|
435 |
+
from TTS.api import TTS
|
436 |
+
import wave
|
437 |
+
import logging
|
438 |
+
|
439 |
+
logger = logging.getLogger(__name__)
|
440 |
+
|
441 |
+
# Initialize TTS model only once (outside the function)
|
442 |
+
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
|
443 |
+
|
444 |
def generate_voiceover_clone(translated_json, desired_duration, target_language, speaker_wav_path, output_audio_path):
|
445 |
try:
|
446 |
+
# 1. Assemble full text
|
447 |
+
full_text = " ".join(entry["translated"] for entry in translated_json if "translated" in entry and entry["translated"].strip())
|
448 |
+
|
449 |
+
if not full_text.strip():
|
450 |
+
logger.error("β Translated text is empty. Skipping TTS generation.")
|
451 |
+
return None, "β Translated text is empty."
|
452 |
+
|
453 |
+
# 2. Check speaker file path
|
454 |
if not speaker_wav_path or not os.path.exists(speaker_wav_path):
|
455 |
+
logger.error(f"β Speaker WAV path not found: {speaker_wav_path}")
|
456 |
+
return None, f"β Speaker audio not found: {speaker_wav_path}"
|
457 |
+
|
458 |
+
# Optional: Print speaker audio duration
|
459 |
+
try:
|
460 |
+
with wave.open(speaker_wav_path, 'rb') as wav_file:
|
461 |
+
duration = wav_file.getnframes() / wav_file.getframerate()
|
462 |
+
logger.info(f"π Speaker WAV Duration: {duration:.2f}s")
|
463 |
+
except Exception as e:
|
464 |
+
logger.warning(f"β οΈ Could not read speaker WAV duration: {e}")
|
465 |
|
466 |
+
# 3. Log key inputs
|
467 |
+
logger.info(f"π₯ Received Text: {full_text}")
|
468 |
+
logger.info(f"π Speaker WAV Path: {speaker_wav_path}")
|
469 |
+
logger.info(f"π Target Language: {target_language}")
|
470 |
+
logger.info(f"πΎ Output Path: {output_audio_path}")
|
471 |
+
logger.info(f"β±οΈ Target Duration: {desired_duration:.2f}s")
|
472 |
|
473 |
+
# 4. Call TTS to generate audio
|
474 |
tts.tts_to_file(
|
475 |
text=full_text,
|
476 |
speaker_wav=speaker_wav_path,
|
477 |
+
language=target_language,
|
478 |
+
file_path=output_audio_path
|
479 |
+
# Uncomment if your model supports speed:
|
480 |
+
# speed=speed_tts
|
481 |
)
|
482 |
+
|
483 |
+
# 5. Confirm file was written
|
484 |
+
if not os.path.exists(output_audio_path):
|
485 |
+
logger.error(f"β File NOT generated after tts_to_file: {output_audio_path}")
|
486 |
+
return None, f"β Voiceover file not generated at: {output_audio_path}"
|
487 |
+
|
488 |
+
logger.info("β
Voice cloning completed successfully.")
|
489 |
return output_audio_path, "β
Voice cloning completed successfully."
|
490 |
|
491 |
except Exception as e:
|
492 |
+
logger.error("β Error during voice cloning:")
|
493 |
+
logger.error(traceback.format_exc())
|
494 |
+
return None, f"β An error occurred: {str(e)}"
|
495 |
+
|
496 |
|
497 |
def truncated_linear(x):
|
498 |
if x < 15:
|