Spaces:

gnosticdev
/

Creador-de-videos-con-imagen

Running

App Files Files Community

gnosticdev commited on Mar 3

Commit

55cc4ac

verified ·

1 Parent(s): 2d35915

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -31

app.py CHANGED Viewed

@@ -18,11 +18,11 @@ async def text_to_speech(text, voice, rate, pitch):
         return None, "Please enter text to convert."
     if not voice:
         return None, "Please select a voice."
     voice_short_name = voice.split(" - ")[0]
     rate_str = f"{rate:+d}%"
     pitch_str = f"{pitch:+d}Hz"
     try:
         communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
@@ -34,39 +34,52 @@ async def text_to_speech(text, voice, rate, pitch):
     except Exception as e:
         return None, f"An error occurred: {str(e)}"
-# Función para agregar el fondo musical al speech
-def add_background_music(speech_file, background_music_file, output_file):
     speech = AudioSegment.from_mp3(speech_file)
     background_music = AudioSegment.from_mp3(background_music_file)
     background_music = background_music - 16  # Reducción aproximada para 15%
-    if len(background_music) < len(speech):
-        repetitions = math.ceil(len(speech) / len(background_music))
         background_music = background_music * repetitions
-    background_music = background_music[:len(speech)]
-    final_audio = speech.overlay(background_music)
     final_audio.export(output_file, format="mp3")
     print(f"Archivo generado exitosamente: {output_file}")
-# Función para procesar y combinar un solo video con audio
 def process_video_with_audio(audio_path, video_path):
     audio = AudioSegment.from_mp3(audio_path)
     audio_duration_ms = len(audio)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
         output_file = tmp_file.name
     try:
         if not video_path:
             return audio_path, None
         # Obtener duración del video
-        probe_command = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", video_path]
         video_duration = float(subprocess.check_output(probe_command).decode("utf-8").strip()) * 1000
-        # Calcular repeticiones
-        repetitions = math.ceil(audio_duration_ms / video_duration)
-        # Crear el comando FFmpeg para repetir el video y combinar con el audio
         command = [
             "ffmpeg",
             "-y",  # Sobrescribir el archivo de salida si existe
@@ -75,17 +88,18 @@ def process_video_with_audio(audio_path, video_path):
             "-i", audio_path,  # Input del audio
             "-c:v", "libx264",  # Códec de video
             "-c:a", "aac",  # Códec de audio
-            "-t", str(audio_duration_ms / 1000),  # Duración del video igual a la del audio
-            "-vf", f"fade=t=out:st={max(0, audio_duration_ms / 1000 - 3)}:d=3",  # Fade out
             "-map", "0:v",  # Mapear el stream de video
             "-map", "1:a",  # Mapear el stream de audio
             output_file  # Archivo de salida
         ]
         # Ejecutar el comando FFmpeg
         subprocess.run(command, check=True)
         return audio_path, output_file
     except subprocess.CalledProcessError as e:
         error_msg = f"Error processing video: {e.stderr.decode('utf-8', errors='ignore')}"
         print(error_msg)
@@ -96,32 +110,32 @@ async def tts_interface(text, voice, rate, pitch, background_music, video_file):
     speech_file, warning = await text_to_speech(text, voice, rate, pitch)
     if warning:
         return None, None, None, gr.Warning(warning)
     if background_music and background_music != "":
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
             output_audio = tmp_file.name
-            add_background_music(speech_file, background_music, output_audio)
         os.remove(speech_file)
         speech_file = output_audio
     # Procesar un solo video
     video_path = video_file.name if hasattr(video_file, 'name') else video_file if video_file else None
     audio_result, video_result = process_video_with_audio(speech_file, video_path)
     if video_result and audio_result != audio_result:
         os.remove(speech_file)
     return audio_result, video_result, None
 async def create_demo():
     voices = await get_voices()
     description = """
     Convert text to speech with optional audio background (15% volume) and video looping.
-    Upload a video - it will loop to match audio duration with a fade out 3 seconds before the end!
     Using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default.
     """
     demo = gr.Interface(
         fn=tts_interface,
         inputs=[
@@ -137,7 +151,7 @@ async def create_demo():
             gr.Video(label="Generated Video with Audio"),
             gr.Markdown(label="Warning", visible=False)
         ],
-        title="TextSpeech, BG Music and Vide Repeat for YT motivation estoico videos",
         description=description,
         article="Experience the power of Edge TTS with video integration!",
         analytics_enabled=False,

         return None, "Please enter text to convert."
     if not voice:
         return None, "Please select a voice."
     voice_short_name = voice.split(" - ")[0]
     rate_str = f"{rate:+d}%"
     pitch_str = f"{pitch:+d}Hz"
     try:
         communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
     except Exception as e:
         return None, f"An error occurred: {str(e)}"
+# Función para agregar el fondo musical al speech con separación clara entre speech y fade-out
+def add_background_music_with_fade_out(speech_file, background_music_file, output_file):
     speech = AudioSegment.from_mp3(speech_file)
     background_music = AudioSegment.from_mp3(background_music_file)
     background_music = background_music - 16  # Reducción aproximada para 15%
+    # Asegurarse de que el fondo musical sea más largo que el speech + 3 segundos de fade-out
+    if len(background_music) < len(speech) + 3000:  # 3000 ms = 3 segundos
+        repetitions = math.ceil((len(speech) + 3000) / len(background_music))
         background_music = background_music * repetitions
+    # Crear una pista combinada: speech seguido de 3 segundos de fade-out en el fondo musical
+    combined_audio = speech.overlay(background_music[:len(speech)])  # Speech con fondo musical
+    fade_out_segment = background_music[len(speech):len(speech) + 3000].fade_out(3000)  # Fade-out de 3 segundos
+    final_audio = combined_audio + fade_out_segment  # Speech + fade-out
+    # Exportar el audio final
     final_audio.export(output_file, format="mp3")
     print(f"Archivo generado exitosamente: {output_file}")
+# Función para procesar y combinar un solo video con audio, incluyendo fade-out después del speech
 def process_video_with_audio(audio_path, video_path):
     audio = AudioSegment.from_mp3(audio_path)
     audio_duration_ms = len(audio)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
         output_file = tmp_file.name
     try:
         if not video_path:
             return audio_path, None
         # Obtener duración del video
+        probe_command = [
+            "ffprobe",
+            "-v", "error",
+            "-show_entries", "format=duration",
+            "-of", "default=noprint_wrappers=1:nokey=1",
+            video_path
+        ]
         video_duration = float(subprocess.check_output(probe_command).decode("utf-8").strip()) * 1000
+        # Calcular repeticiones necesarias para el video
+        repetitions = math.ceil((audio_duration_ms + 3000) / video_duration)  # Añadir 3 segundos para el fade-out
+        # Crear el comando FFmpeg para repetir el video, combinar con el audio y aplicar fade-out
         command = [
             "ffmpeg",
             "-y",  # Sobrescribir el archivo de salida si existe
             "-i", audio_path,  # Input del audio
             "-c:v", "libx264",  # Códec de video
             "-c:a", "aac",  # Códec de audio
+            "-t", str((audio_duration_ms + 3000) / 1000),  # Duración total: speech + 3 segundos de fade-out
+            "-vf", f"fade=t=out:st={max(0, (audio_duration_ms + 3000) / 1000 - 3)}:d=3",  # Fade-out del video
+            "-af", "afade=t=out:st={}:d=3".format(max(0, (audio_duration_ms + 3000) / 1000 - 3)),  # Fade-out del audio
             "-map", "0:v",  # Mapear el stream de video
             "-map", "1:a",  # Mapear el stream de audio
             output_file  # Archivo de salida
         ]
         # Ejecutar el comando FFmpeg
         subprocess.run(command, check=True)
         return audio_path, output_file
     except subprocess.CalledProcessError as e:
         error_msg = f"Error processing video: {e.stderr.decode('utf-8', errors='ignore')}"
         print(error_msg)
     speech_file, warning = await text_to_speech(text, voice, rate, pitch)
     if warning:
         return None, None, None, gr.Warning(warning)
     if background_music and background_music != "":
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
             output_audio = tmp_file.name
+            add_background_music_with_fade_out(speech_file, background_music, output_audio)
         os.remove(speech_file)
         speech_file = output_audio
     # Procesar un solo video
     video_path = video_file.name if hasattr(video_file, 'name') else video_file if video_file else None
     audio_result, video_result = process_video_with_audio(speech_file, video_path)
     if video_result and audio_result != audio_result:
         os.remove(speech_file)
     return audio_result, video_result, None
 async def create_demo():
     voices = await get_voices()
     description = """
     Convert text to speech with optional audio background (15% volume) and video looping.
+    Upload a video - it will loop to match audio duration with a fade out 3 seconds after the speech ends!
     Using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default.
     """
     demo = gr.Interface(
         fn=tts_interface,
         inputs=[
             gr.Video(label="Generated Video with Audio"),
             gr.Markdown(label="Warning", visible=False)
         ],
+        title="TextSpeech, BG Music and Video Repeat for YT Motivation Videos",
         description=description,
         article="Experience the power of Edge TTS with video integration!",
         analytics_enabled=False,