Update app.py
Browse files
app.py
CHANGED
@@ -18,11 +18,11 @@ async def text_to_speech(text, voice, rate, pitch):
|
|
18 |
return None, "Please enter text to convert."
|
19 |
if not voice:
|
20 |
return None, "Please select a voice."
|
21 |
-
|
22 |
voice_short_name = voice.split(" - ")[0]
|
23 |
rate_str = f"{rate:+d}%"
|
24 |
pitch_str = f"{pitch:+d}Hz"
|
25 |
-
|
26 |
try:
|
27 |
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
|
28 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
@@ -34,39 +34,52 @@ async def text_to_speech(text, voice, rate, pitch):
|
|
34 |
except Exception as e:
|
35 |
return None, f"An error occurred: {str(e)}"
|
36 |
|
37 |
-
# Funci贸n para agregar el fondo musical al speech
|
38 |
-
def
|
39 |
speech = AudioSegment.from_mp3(speech_file)
|
40 |
background_music = AudioSegment.from_mp3(background_music_file)
|
41 |
background_music = background_music - 16 # Reducci贸n aproximada para 15%
|
42 |
-
|
43 |
-
|
|
|
|
|
44 |
background_music = background_music * repetitions
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
47 |
final_audio.export(output_file, format="mp3")
|
48 |
print(f"Archivo generado exitosamente: {output_file}")
|
49 |
|
50 |
-
# Funci贸n para procesar y combinar un solo video con audio
|
51 |
def process_video_with_audio(audio_path, video_path):
|
52 |
audio = AudioSegment.from_mp3(audio_path)
|
53 |
audio_duration_ms = len(audio)
|
54 |
-
|
55 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
|
56 |
output_file = tmp_file.name
|
57 |
-
|
58 |
try:
|
59 |
if not video_path:
|
60 |
return audio_path, None
|
61 |
-
|
62 |
# Obtener duraci贸n del video
|
63 |
-
probe_command = [
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
video_duration = float(subprocess.check_output(probe_command).decode("utf-8").strip()) * 1000
|
65 |
-
|
66 |
-
# Calcular repeticiones
|
67 |
-
repetitions = math.ceil(audio_duration_ms / video_duration)
|
68 |
-
|
69 |
-
# Crear el comando FFmpeg para repetir el video
|
70 |
command = [
|
71 |
"ffmpeg",
|
72 |
"-y", # Sobrescribir el archivo de salida si existe
|
@@ -75,17 +88,18 @@ def process_video_with_audio(audio_path, video_path):
|
|
75 |
"-i", audio_path, # Input del audio
|
76 |
"-c:v", "libx264", # C贸dec de video
|
77 |
"-c:a", "aac", # C贸dec de audio
|
78 |
-
"-t", str(audio_duration_ms / 1000), # Duraci贸n
|
79 |
-
"-vf", f"fade=t=out:st={max(0, audio_duration_ms / 1000 - 3)}:d=3", # Fade
|
|
|
80 |
"-map", "0:v", # Mapear el stream de video
|
81 |
"-map", "1:a", # Mapear el stream de audio
|
82 |
output_file # Archivo de salida
|
83 |
]
|
84 |
-
|
85 |
# Ejecutar el comando FFmpeg
|
86 |
subprocess.run(command, check=True)
|
87 |
return audio_path, output_file
|
88 |
-
|
89 |
except subprocess.CalledProcessError as e:
|
90 |
error_msg = f"Error processing video: {e.stderr.decode('utf-8', errors='ignore')}"
|
91 |
print(error_msg)
|
@@ -96,32 +110,32 @@ async def tts_interface(text, voice, rate, pitch, background_music, video_file):
|
|
96 |
speech_file, warning = await text_to_speech(text, voice, rate, pitch)
|
97 |
if warning:
|
98 |
return None, None, None, gr.Warning(warning)
|
99 |
-
|
100 |
if background_music and background_music != "":
|
101 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
102 |
output_audio = tmp_file.name
|
103 |
-
|
104 |
os.remove(speech_file)
|
105 |
speech_file = output_audio
|
106 |
-
|
107 |
# Procesar un solo video
|
108 |
video_path = video_file.name if hasattr(video_file, 'name') else video_file if video_file else None
|
109 |
audio_result, video_result = process_video_with_audio(speech_file, video_path)
|
110 |
-
|
111 |
if video_result and audio_result != audio_result:
|
112 |
os.remove(speech_file)
|
113 |
-
|
114 |
return audio_result, video_result, None
|
115 |
|
116 |
async def create_demo():
|
117 |
voices = await get_voices()
|
118 |
-
|
119 |
description = """
|
120 |
Convert text to speech with optional audio background (15% volume) and video looping.
|
121 |
-
Upload a video - it will loop to match audio duration with a fade out 3 seconds
|
122 |
Using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default.
|
123 |
"""
|
124 |
-
|
125 |
demo = gr.Interface(
|
126 |
fn=tts_interface,
|
127 |
inputs=[
|
@@ -137,7 +151,7 @@ async def create_demo():
|
|
137 |
gr.Video(label="Generated Video with Audio"),
|
138 |
gr.Markdown(label="Warning", visible=False)
|
139 |
],
|
140 |
-
title="TextSpeech, BG Music and
|
141 |
description=description,
|
142 |
article="Experience the power of Edge TTS with video integration!",
|
143 |
analytics_enabled=False,
|
|
|
18 |
return None, "Please enter text to convert."
|
19 |
if not voice:
|
20 |
return None, "Please select a voice."
|
21 |
+
|
22 |
voice_short_name = voice.split(" - ")[0]
|
23 |
rate_str = f"{rate:+d}%"
|
24 |
pitch_str = f"{pitch:+d}Hz"
|
25 |
+
|
26 |
try:
|
27 |
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
|
28 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
|
|
34 |
except Exception as e:
|
35 |
return None, f"An error occurred: {str(e)}"
|
36 |
|
37 |
+
# Funci贸n para agregar el fondo musical al speech con separaci贸n clara entre speech y fade-out
|
38 |
+
def add_background_music_with_fade_out(speech_file, background_music_file, output_file):
|
39 |
speech = AudioSegment.from_mp3(speech_file)
|
40 |
background_music = AudioSegment.from_mp3(background_music_file)
|
41 |
background_music = background_music - 16 # Reducci贸n aproximada para 15%
|
42 |
+
|
43 |
+
# Asegurarse de que el fondo musical sea m谩s largo que el speech + 3 segundos de fade-out
|
44 |
+
if len(background_music) < len(speech) + 3000: # 3000 ms = 3 segundos
|
45 |
+
repetitions = math.ceil((len(speech) + 3000) / len(background_music))
|
46 |
background_music = background_music * repetitions
|
47 |
+
|
48 |
+
# Crear una pista combinada: speech seguido de 3 segundos de fade-out en el fondo musical
|
49 |
+
combined_audio = speech.overlay(background_music[:len(speech)]) # Speech con fondo musical
|
50 |
+
fade_out_segment = background_music[len(speech):len(speech) + 3000].fade_out(3000) # Fade-out de 3 segundos
|
51 |
+
final_audio = combined_audio + fade_out_segment # Speech + fade-out
|
52 |
+
|
53 |
+
# Exportar el audio final
|
54 |
final_audio.export(output_file, format="mp3")
|
55 |
print(f"Archivo generado exitosamente: {output_file}")
|
56 |
|
57 |
+
# Funci贸n para procesar y combinar un solo video con audio, incluyendo fade-out despu茅s del speech
|
58 |
def process_video_with_audio(audio_path, video_path):
|
59 |
audio = AudioSegment.from_mp3(audio_path)
|
60 |
audio_duration_ms = len(audio)
|
61 |
+
|
62 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
|
63 |
output_file = tmp_file.name
|
64 |
+
|
65 |
try:
|
66 |
if not video_path:
|
67 |
return audio_path, None
|
68 |
+
|
69 |
# Obtener duraci贸n del video
|
70 |
+
probe_command = [
|
71 |
+
"ffprobe",
|
72 |
+
"-v", "error",
|
73 |
+
"-show_entries", "format=duration",
|
74 |
+
"-of", "default=noprint_wrappers=1:nokey=1",
|
75 |
+
video_path
|
76 |
+
]
|
77 |
video_duration = float(subprocess.check_output(probe_command).decode("utf-8").strip()) * 1000
|
78 |
+
|
79 |
+
# Calcular repeticiones necesarias para el video
|
80 |
+
repetitions = math.ceil((audio_duration_ms + 3000) / video_duration) # A帽adir 3 segundos para el fade-out
|
81 |
+
|
82 |
+
# Crear el comando FFmpeg para repetir el video, combinar con el audio y aplicar fade-out
|
83 |
command = [
|
84 |
"ffmpeg",
|
85 |
"-y", # Sobrescribir el archivo de salida si existe
|
|
|
88 |
"-i", audio_path, # Input del audio
|
89 |
"-c:v", "libx264", # C贸dec de video
|
90 |
"-c:a", "aac", # C贸dec de audio
|
91 |
+
"-t", str((audio_duration_ms + 3000) / 1000), # Duraci贸n total: speech + 3 segundos de fade-out
|
92 |
+
"-vf", f"fade=t=out:st={max(0, (audio_duration_ms + 3000) / 1000 - 3)}:d=3", # Fade-out del video
|
93 |
+
"-af", "afade=t=out:st={}:d=3".format(max(0, (audio_duration_ms + 3000) / 1000 - 3)), # Fade-out del audio
|
94 |
"-map", "0:v", # Mapear el stream de video
|
95 |
"-map", "1:a", # Mapear el stream de audio
|
96 |
output_file # Archivo de salida
|
97 |
]
|
98 |
+
|
99 |
# Ejecutar el comando FFmpeg
|
100 |
subprocess.run(command, check=True)
|
101 |
return audio_path, output_file
|
102 |
+
|
103 |
except subprocess.CalledProcessError as e:
|
104 |
error_msg = f"Error processing video: {e.stderr.decode('utf-8', errors='ignore')}"
|
105 |
print(error_msg)
|
|
|
110 |
speech_file, warning = await text_to_speech(text, voice, rate, pitch)
|
111 |
if warning:
|
112 |
return None, None, None, gr.Warning(warning)
|
113 |
+
|
114 |
if background_music and background_music != "":
|
115 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
116 |
output_audio = tmp_file.name
|
117 |
+
add_background_music_with_fade_out(speech_file, background_music, output_audio)
|
118 |
os.remove(speech_file)
|
119 |
speech_file = output_audio
|
120 |
+
|
121 |
# Procesar un solo video
|
122 |
video_path = video_file.name if hasattr(video_file, 'name') else video_file if video_file else None
|
123 |
audio_result, video_result = process_video_with_audio(speech_file, video_path)
|
124 |
+
|
125 |
if video_result and audio_result != audio_result:
|
126 |
os.remove(speech_file)
|
127 |
+
|
128 |
return audio_result, video_result, None
|
129 |
|
130 |
async def create_demo():
|
131 |
voices = await get_voices()
|
132 |
+
|
133 |
description = """
|
134 |
Convert text to speech with optional audio background (15% volume) and video looping.
|
135 |
+
Upload a video - it will loop to match audio duration with a fade out 3 seconds after the speech ends!
|
136 |
Using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default.
|
137 |
"""
|
138 |
+
|
139 |
demo = gr.Interface(
|
140 |
fn=tts_interface,
|
141 |
inputs=[
|
|
|
151 |
gr.Video(label="Generated Video with Audio"),
|
152 |
gr.Markdown(label="Warning", visible=False)
|
153 |
],
|
154 |
+
title="TextSpeech, BG Music and Video Repeat for YT Motivation Videos",
|
155 |
description=description,
|
156 |
article="Experience the power of Edge TTS with video integration!",
|
157 |
analytics_enabled=False,
|