gnosticdev commited on
Commit
dc23696
verified
1 Parent(s): 505d2e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -112
app.py CHANGED
@@ -7,24 +7,20 @@ import edge_tts
7
  import gradio as gr
8
  from pydub import AudioSegment
9
 
10
- # Configuraci贸n de Logs
11
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
12
 
13
- # CONSTANTES DE ARCHIVOS
14
  INTRO_VIDEO = "introvideo.mp4"
15
  OUTRO_VIDEO = "outrovideo.mp4"
16
  MUSIC_BG = "musicafondo.mp3"
17
  EJEMPLO_VIDEO = "ejemplo.mp4"
18
 
19
- # Validar existencia de archivos
20
  for file in [INTRO_VIDEO, OUTRO_VIDEO, MUSIC_BG, EJEMPLO_VIDEO]:
21
  if not os.path.exists(file):
22
  logging.error(f"Falta archivo necesario: {file}")
23
  raise FileNotFoundError(f"Falta: {file}")
24
 
25
- # Configuraci贸n de chunks
26
- SEGMENT_DURATION = 30 # Duraci贸n exacta entre transiciones (sin overlap)
27
- TRANSITION_DURATION = 1.5 # Duraci贸n del efecto slide
28
 
29
  def eliminar_archivo_tiempo(ruta, delay=1800):
30
  def eliminar():
@@ -44,11 +40,8 @@ async def generar_tts(texto, voz, duracion_total):
44
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_tts:
45
  await communicate.save(tmp_tts.name)
46
  tts_audio = AudioFileClip(tmp_tts.name)
47
-
48
- # Asegurar que el TTS no exceda la duraci贸n del video
49
  if tts_audio.duration > duracion_total:
50
  tts_audio = tts_audio.subclip(0, duracion_total)
51
-
52
  return tts_audio, tmp_tts.name
53
  except Exception as e:
54
  logging.error(f"Fallo en TTS: {str(e)}")
@@ -60,25 +53,19 @@ def crear_musica_fondo(duracion_total):
60
  repeticiones = needed_ms // len(bg_music) + 1
61
  bg_music = bg_music * repeticiones
62
  bg_music = bg_music[:needed_ms].fade_out(1000)
63
-
64
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_bg:
65
  bg_music.export(tmp_bg.name, format="mp3")
66
  return AudioFileClip(tmp_bg.name).volumex(0.15), tmp_bg.name
67
 
68
  def create_slide_transition(clip1, clip2, duration=TRANSITION_DURATION):
69
- """Transici贸n slide con movimiento m谩s pronunciado"""
70
- # Tomar la 煤ltima parte del clip1 y la primera parte del clip2
71
  part1 = clip1.subclip(clip1.duration - duration)
72
  part2 = clip2.subclip(0, duration)
73
-
74
- # Crear animaci贸n de deslizamiento
75
  transition = CompositeVideoClip([
76
  part1.fx(vfx.fadeout, duration),
77
  part2.fx(vfx.fadein, duration).set_position(
78
- lambda t: ('center', 720 - (720 * (t/duration))) # Movimiento desde abajo
79
  )
80
  ], size=(1280, 720)).set_duration(duration)
81
-
82
  return transition
83
 
84
  async def procesar_video(video_input, texto_tts, voz_seleccionada):
@@ -88,70 +75,38 @@ async def procesar_video(video_input, texto_tts, voz_seleccionada):
88
  logging.info("Iniciando procesamiento")
89
  video_original = VideoFileClip(video_input, target_resolution=(720, 1280))
90
  duracion_video = video_original.duration
91
-
92
- # Generar TTS y m煤sica de fondo
93
  tts_audio, tts_path = await generar_tts(texto_tts, voz_seleccionada, duracion_video)
94
  bg_audio, bg_path = crear_musica_fondo(duracion_video)
95
  temp_files.extend([tts_path, bg_path])
96
-
97
- # Combinar audios
98
  audio_original = video_original.audio.volumex(0.7) if video_original.audio else None
99
  audios = [bg_audio.set_duration(duracion_video)]
100
  if audio_original:
101
  audios.append(audio_original)
102
  audios.append(tts_audio.set_start(0).volumex(0.85))
103
  audio_final = CompositeAudioClip(audios).set_duration(duracion_video)
104
-
105
- # CORRECCI脫N: Simplificar la creaci贸n de segmentos y transiciones
106
- # Dividir el video en segmentos exactos de 30 segundos y aplicar transiciones solo en esos puntos
107
-
108
- # Crear un 煤nico clip con todo el contenido, con transiciones solo cada 30 segundos
109
  video_final = video_original.copy()
110
-
111
- # Si es necesario, realizar cortes y transiciones cada 30 segundos
112
  if duracion_video > SEGMENT_DURATION:
113
  clips = []
114
  num_segments = int(duracion_video // SEGMENT_DURATION) + (1 if duracion_video % SEGMENT_DURATION > 0 else 0)
115
-
116
  for i in range(num_segments):
117
  start_time = i * SEGMENT_DURATION
118
  end_time = min(start_time + SEGMENT_DURATION, duracion_video)
119
-
120
- # Obtener segmento actual
121
  segment = video_original.subclip(start_time, end_time)
122
-
123
- # Para el primer segmento, solo a帽adirlo al resultado
124
  if i == 0:
125
  clips.append(segment)
126
  else:
127
- # Para los dem谩s segmentos, a帽adir transici贸n
128
  prev_segment = clips[-1]
129
- # Crear transici贸n entre segmentos
130
  transition = create_slide_transition(prev_segment, segment)
131
-
132
- # Recortar el segmento anterior para que termine justo antes de la transici贸n
133
  prev_end = prev_segment.duration - TRANSITION_DURATION
134
  if prev_end > 0:
135
  clips[-1] = prev_segment.subclip(0, prev_end)
136
-
137
- # A帽adir la transici贸n
138
  clips.append(transition)
139
-
140
- # A帽adir el segmento actual, empezando despu茅s de la transici贸n
141
  clips.append(segment)
142
-
143
- # Combinar todos los clips
144
  video_final = concatenate_videoclips(clips, method="compose")
145
-
146
- # Establecer el audio final
147
  video_final = video_final.set_audio(audio_final)
148
-
149
- # Agregar intro y outro
150
  intro = VideoFileClip(INTRO_VIDEO, target_resolution=(720, 1280))
151
  outro = VideoFileClip(OUTRO_VIDEO, target_resolution=(720, 1280))
152
  video_final = concatenate_videoclips([intro, video_final, outro], method="compose")
153
-
154
- # Renderizado final
155
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
156
  video_final.write_videofile(
157
  tmp.name,
@@ -171,7 +126,6 @@ async def procesar_video(video_input, texto_tts, voz_seleccionada):
171
  eliminar_archivo_tiempo(tmp.name, 1800)
172
  logging.info(f"Video final guardado: {tmp.name}")
173
  return tmp.name
174
-
175
  except Exception as e:
176
  logging.error(f"Fallo general: {str(e)}")
177
  raise
@@ -191,10 +145,8 @@ async def procesar_video(video_input, texto_tts, voz_seleccionada):
191
  except Exception as e:
192
  logging.warning(f"Error al cerrar recursos: {str(e)}")
193
 
194
- # Interfaz Gradio
195
  with gr.Blocks() as demo:
196
  gr.Markdown("# Editor de Video con IA")
197
-
198
  with gr.Tab("Principal"):
199
  video_input = gr.Video(label="Subir video")
200
  texto_tts = gr.Textbox(
@@ -203,75 +155,73 @@ with gr.Blocks() as demo:
203
  placeholder="Escribe aqu铆 tu texto..."
204
  )
205
  voz_seleccionada = gr.Dropdown(
206
- label="Voz",
207
- choices=[
208
- "es-ES-AlvaroNeural", "es-MX-BeatrizNeural",
209
- "es-ES-ElviraNeural", "es-MX-JavierNeural",
210
- "es-AR-ElenaNeural", "es-AR-TomasNeural",
211
- "es-CL-CatalinaNeural", "es-CL-LorenzoNeural",
212
- "es-CO-SofiaNeural", "es-CO-GonzaloNeural",
213
- "es-PE-CamilaNeural", "es-PE-AlexNeural",
214
- "es-VE-MariaNeural", "es-VE-ManuelNeural",
215
- "es-US-AlonsoNeural", "es-US-PalomaNeural",
216
- "es-ES-AbrilNeural", "es-ES-DarioNeural",
217
- "es-ES-HelenaRUS", "es-ES-LauraNeural",
218
- "es-ES-PabloNeural", "es-ES-TriniNeural",
219
- "en-US-AriaNeural", "en-US-GuyNeural",
220
- "en-US-JennyNeural", "en-US-AmberNeural",
221
- "en-US-AnaNeural", "en-US-AshleyNeural",
222
- "en-US-BrandonNeural", "en-US-ChristopherNeural",
223
- "en-US-CoraNeural", "en-US-DavisNeural",
224
- "en-US-ElizabethNeural", "en-US-EricNeural",
225
- "en-US-GinaNeural", "en-US-JacobNeural",
226
- "en-US-JaneNeural", "en-US-JasonNeural",
227
- "en-US-MichelleNeural", "en-US-MonicaNeural",
228
- "en-US-SaraNeural", "en-US-SteffanNeural",
229
- "en-US-TonyNeural", "en-US-YaraNeural",
230
- "fr-FR-AlainNeural", "fr-FR-BrigitteNeural",
231
- "fr-FR-CelesteNeural", "fr-FR-ClaudeNeural",
232
- "fr-FR-CoralieNeural", "fr-FR-DeniseNeural",
233
- "fr-FR-EloiseNeural", "fr-FR-HenriNeural",
234
- "fr-FR-JacquelineNeural", "fr-FR-JeromeNeural",
235
- "fr-FR-JosephineNeural", "fr-FR-MauriceNeural",
236
- "fr-FR-YvesNeural", "fr-FR-YvetteNeural",
237
- "de-DE-AmalaNeural", "de-DE-BerndNeural",
238
- "de-DE-ChristophNeural", "de-DE-ConradNeural",
239
- "de-DE-ElkeNeural", "de-DE-GiselaNeural",
240
- "de-DE-KasperNeural", "de-DE-KatjaNeural",
241
- "de-DE-KillianNeural", "de-DE-KlarissaNeural",
242
- "de-DE-KlausNeural", "de-DE-LouisaNeural",
243
- "de-DE-MajaNeural", "de-DE-RalfNeural",
244
- "de-DE-TanjaNeural", "de-DE-ViktoriaNeural",
245
- "it-IT-BenignoNeural", "it-IT-CalimeroNeural",
246
- "it-IT-CataldoNeural", "it-IT-DiegoNeural",
247
- "it-IT-ElsaNeural", "it-IT-FabiolaNeural",
248
- "it-IT-GianniNeural", "it-IT-ImeldaNeural",
249
- "it-IT-IrmaNeural", "it-IT-IsabellaNeural",
250
- "it-IT-LisandroNeural", "it-IT-PalmiraNeural",
251
- "it-IT-PierinaNeural", "it-IT-RinaldoNeural",
252
- "ja-JP-AoiNeural", "ja-JP-DaichiNeural",
253
- "ja-JP-HarukaNeural", "ja-JP-KeitaNeural",
254
- "ja-JP-MayuNeural", "ja-JP-NanamiNeural",
255
- "ja-JP-NaokiNeural", "ja-JP-ShioriNeural"
256
- ],
257
- value="es-ES-AlvaroNeural"
258
- )
259
  procesar_btn = gr.Button("Generar Video")
260
  video_output = gr.Video(label="Video Procesado")
261
-
262
  with gr.Accordion("Ejemplos de Uso", open=False):
263
  gr.Examples(
264
  examples=[[EJEMPLO_VIDEO, "隆Hola! Esto es una prueba. Suscr铆bete al canal."]],
265
  inputs=[video_input, texto_tts],
266
  label="Ejemplos"
267
  )
268
-
269
  procesar_btn.click(
270
  procesar_video,
271
  inputs=[video_input, texto_tts, voz_seleccionada],
272
  outputs=video_output
273
  )
274
- # Informaci贸n adicional en pie de p谩gina
275
  gr.Markdown("""
276
  ### 鈩癸笍 Notas importantes:
277
  - Las transiciones ocurren solamente cada 30 segundos
@@ -280,10 +230,5 @@ with gr.Blocks() as demo:
280
  - Para mejores resultados, usa videos de dimensiones 720p o 1080p
281
  """)
282
 
283
- procesar_btn.click(
284
- procesar_video,
285
- inputs=[video_input, texto_tts, voz_seleccionada],
286
- outputs=video_output
287
- )
288
  if __name__ == "__main__":
289
  demo.queue().launch()
 
7
  import gradio as gr
8
  from pydub import AudioSegment
9
 
 
10
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
11
 
 
12
  INTRO_VIDEO = "introvideo.mp4"
13
  OUTRO_VIDEO = "outrovideo.mp4"
14
  MUSIC_BG = "musicafondo.mp3"
15
  EJEMPLO_VIDEO = "ejemplo.mp4"
16
 
 
17
  for file in [INTRO_VIDEO, OUTRO_VIDEO, MUSIC_BG, EJEMPLO_VIDEO]:
18
  if not os.path.exists(file):
19
  logging.error(f"Falta archivo necesario: {file}")
20
  raise FileNotFoundError(f"Falta: {file}")
21
 
22
+ SEGMENT_DURATION = 30
23
+ TRANSITION_DURATION = 1.5
 
24
 
25
  def eliminar_archivo_tiempo(ruta, delay=1800):
26
  def eliminar():
 
40
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_tts:
41
  await communicate.save(tmp_tts.name)
42
  tts_audio = AudioFileClip(tmp_tts.name)
 
 
43
  if tts_audio.duration > duracion_total:
44
  tts_audio = tts_audio.subclip(0, duracion_total)
 
45
  return tts_audio, tmp_tts.name
46
  except Exception as e:
47
  logging.error(f"Fallo en TTS: {str(e)}")
 
53
  repeticiones = needed_ms // len(bg_music) + 1
54
  bg_music = bg_music * repeticiones
55
  bg_music = bg_music[:needed_ms].fade_out(1000)
 
56
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_bg:
57
  bg_music.export(tmp_bg.name, format="mp3")
58
  return AudioFileClip(tmp_bg.name).volumex(0.15), tmp_bg.name
59
 
60
  def create_slide_transition(clip1, clip2, duration=TRANSITION_DURATION):
 
 
61
  part1 = clip1.subclip(clip1.duration - duration)
62
  part2 = clip2.subclip(0, duration)
 
 
63
  transition = CompositeVideoClip([
64
  part1.fx(vfx.fadeout, duration),
65
  part2.fx(vfx.fadein, duration).set_position(
66
+ lambda t: ('center', 720 - (720 * (t/duration)))
67
  )
68
  ], size=(1280, 720)).set_duration(duration)
 
69
  return transition
70
 
71
  async def procesar_video(video_input, texto_tts, voz_seleccionada):
 
75
  logging.info("Iniciando procesamiento")
76
  video_original = VideoFileClip(video_input, target_resolution=(720, 1280))
77
  duracion_video = video_original.duration
 
 
78
  tts_audio, tts_path = await generar_tts(texto_tts, voz_seleccionada, duracion_video)
79
  bg_audio, bg_path = crear_musica_fondo(duracion_video)
80
  temp_files.extend([tts_path, bg_path])
 
 
81
  audio_original = video_original.audio.volumex(0.7) if video_original.audio else None
82
  audios = [bg_audio.set_duration(duracion_video)]
83
  if audio_original:
84
  audios.append(audio_original)
85
  audios.append(tts_audio.set_start(0).volumex(0.85))
86
  audio_final = CompositeAudioClip(audios).set_duration(duracion_video)
 
 
 
 
 
87
  video_final = video_original.copy()
 
 
88
  if duracion_video > SEGMENT_DURATION:
89
  clips = []
90
  num_segments = int(duracion_video // SEGMENT_DURATION) + (1 if duracion_video % SEGMENT_DURATION > 0 else 0)
 
91
  for i in range(num_segments):
92
  start_time = i * SEGMENT_DURATION
93
  end_time = min(start_time + SEGMENT_DURATION, duracion_video)
 
 
94
  segment = video_original.subclip(start_time, end_time)
 
 
95
  if i == 0:
96
  clips.append(segment)
97
  else:
 
98
  prev_segment = clips[-1]
 
99
  transition = create_slide_transition(prev_segment, segment)
 
 
100
  prev_end = prev_segment.duration - TRANSITION_DURATION
101
  if prev_end > 0:
102
  clips[-1] = prev_segment.subclip(0, prev_end)
 
 
103
  clips.append(transition)
 
 
104
  clips.append(segment)
 
 
105
  video_final = concatenate_videoclips(clips, method="compose")
 
 
106
  video_final = video_final.set_audio(audio_final)
 
 
107
  intro = VideoFileClip(INTRO_VIDEO, target_resolution=(720, 1280))
108
  outro = VideoFileClip(OUTRO_VIDEO, target_resolution=(720, 1280))
109
  video_final = concatenate_videoclips([intro, video_final, outro], method="compose")
 
 
110
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
111
  video_final.write_videofile(
112
  tmp.name,
 
126
  eliminar_archivo_tiempo(tmp.name, 1800)
127
  logging.info(f"Video final guardado: {tmp.name}")
128
  return tmp.name
 
129
  except Exception as e:
130
  logging.error(f"Fallo general: {str(e)}")
131
  raise
 
145
  except Exception as e:
146
  logging.warning(f"Error al cerrar recursos: {str(e)}")
147
 
 
148
  with gr.Blocks() as demo:
149
  gr.Markdown("# Editor de Video con IA")
 
150
  with gr.Tab("Principal"):
151
  video_input = gr.Video(label="Subir video")
152
  texto_tts = gr.Textbox(
 
155
  placeholder="Escribe aqu铆 tu texto..."
156
  )
157
  voz_seleccionada = gr.Dropdown(
158
+ label="Voz",
159
+ choices=[
160
+ "es-ES-AlvaroNeural", "es-MX-BeatrizNeural",
161
+ "es-ES-ElviraNeural", "es-MX-JavierNeural",
162
+ "es-AR-ElenaNeural", "es-AR-TomasNeural",
163
+ "es-CL-CatalinaNeural", "es-CL-LorenzoNeural",
164
+ "es-CO-SofiaNeural", "es-CO-GonzaloNeural",
165
+ "es-PE-CamilaNeural", "es-PE-AlexNeural",
166
+ "es-VE-MariaNeural", "es-VE-ManuelNeural",
167
+ "es-US-AlonsoNeural", "es-US-PalomaNeural",
168
+ "es-ES-AbrilNeural", "es-ES-DarioNeural",
169
+ "es-ES-HelenaRUS", "es-ES-LauraNeural",
170
+ "es-ES-PabloNeural", "es-ES-TriniNeural",
171
+ "en-US-AriaNeural", "en-US-GuyNeural",
172
+ "en-US-JennyNeural", "en-US-AmberNeural",
173
+ "en-US-AnaNeural", "en-US-AshleyNeural",
174
+ "en-US-BrandonNeural", "en-US-ChristopherNeural",
175
+ "en-US-CoraNeural", "en-US-DavisNeural",
176
+ "en-US-ElizabethNeural", "en-US-EricNeural",
177
+ "en-US-GinaNeural", "en-US-JacobNeural",
178
+ "en-US-JaneNeural", "en-US-JasonNeural",
179
+ "en-US-MichelleNeural", "en-US-MonicaNeural",
180
+ "en-US-SaraNeural", "en-US-SteffanNeural",
181
+ "en-US-TonyNeural", "en-US-YaraNeural",
182
+ "fr-FR-AlainNeural", "fr-FR-BrigitteNeural",
183
+ "fr-FR-CelesteNeural", "fr-FR-ClaudeNeural",
184
+ "fr-FR-CoralieNeural", "fr-FR-DeniseNeural",
185
+ "fr-FR-EloiseNeural", "fr-FR-HenriNeural",
186
+ "fr-FR-JacquelineNeural", "fr-FR-JeromeNeural",
187
+ "fr-FR-JosephineNeural", "fr-FR-MauriceNeural",
188
+ "fr-FR-YvesNeural", "fr-FR-YvetteNeural",
189
+ "de-DE-AmalaNeural", "de-DE-BerndNeural",
190
+ "de-DE-ChristophNeural", "de-DE-ConradNeural",
191
+ "de-DE-ElkeNeural", "de-DE-GiselaNeural",
192
+ "de-DE-KasperNeural", "de-DE-KatjaNeural",
193
+ "de-DE-KillianNeural", "de-DE-KlarissaNeural",
194
+ "de-DE-KlausNeural", "de-DE-LouisaNeural",
195
+ "de-DE-MajaNeural", "de-DE-RalfNeural",
196
+ "de-DE-TanjaNeural", "de-DE-ViktoriaNeural",
197
+ "it-IT-BenignoNeural", "it-IT-CalimeroNeural",
198
+ "it-IT-CataldoNeural", "it-IT-DiegoNeural",
199
+ "it-IT-ElsaNeural", "it-IT-FabiolaNeural",
200
+ "it-IT-GianniNeural", "it-IT-ImeldaNeural",
201
+ "it-IT-IrmaNeural", "it-IT-IsabellaNeural",
202
+ "it-IT-LisandroNeural", "it-IT-PalmiraNeural",
203
+ "it-IT-PierinaNeural", "it-IT-RinaldoNeural",
204
+ "ja-JP-AoiNeural", "ja-JP-DaichiNeural",
205
+ "ja-JP-HarukaNeural", "ja-JP-KeitaNeural",
206
+ "ja-JP-MayuNeural", "ja-JP-NanamiNeural",
207
+ "ja-JP-NaokiNeural", "ja-JP-ShioriNeural"
208
+ ],
209
+ value="es-ES-AlvaroNeural"
210
+ )
211
  procesar_btn = gr.Button("Generar Video")
212
  video_output = gr.Video(label="Video Procesado")
 
213
  with gr.Accordion("Ejemplos de Uso", open=False):
214
  gr.Examples(
215
  examples=[[EJEMPLO_VIDEO, "隆Hola! Esto es una prueba. Suscr铆bete al canal."]],
216
  inputs=[video_input, texto_tts],
217
  label="Ejemplos"
218
  )
 
219
  procesar_btn.click(
220
  procesar_video,
221
  inputs=[video_input, texto_tts, voz_seleccionada],
222
  outputs=video_output
223
  )
224
+
225
  gr.Markdown("""
226
  ### 鈩癸笍 Notas importantes:
227
  - Las transiciones ocurren solamente cada 30 segundos
 
230
  - Para mejores resultados, usa videos de dimensiones 720p o 1080p
231
  """)
232
 
 
 
 
 
 
233
  if __name__ == "__main__":
234
  demo.queue().launch()