Spaces:

gnosticdev
/

Creador-de-videos-con-imagen

Running

App Files Files Community

Creador-de-videos-con-imagen / app.py

gnosticdev

Update app.py

209bf9f verified 5 months ago

raw

history blame

5.44 kB

	import gradio as gr
	import edge_tts
	import asyncio
	import tempfile
	import os
	import math
	from pydub import AudioSegment
	import subprocess

	# Función para obtener voces disponibles
	async def get_voices():
	voices = await edge_tts.list_voices()
	return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

	# Conversión de texto a voz
	async def text_to_speech(text, voice, rate, pitch):
	if not text.strip() or not voice:
	return (None, "Please enter text and select a voice") if not text else (None, "Please select a voice")

	try:
	communicate = edge_tts.Communicate(
	text,
	voice.split(" - ")[0],
	rate=f"{rate:+d}%",
	pitch=f"{pitch:+d}Hz"
	)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
	await communicate.save(tmp_file.name)
	return tmp_file.name, None
	except Exception as e:
	return None, f"Speech generation failed: {str(e)}"

	# Agregar música de fondo
	def add_background_music(speech_path, bg_music_path):
	speech = AudioSegment.from_file(speech_path)
	background = AudioSegment.from_file(bg_music_path) - 16 # 15% volume

	# Asegurar que la música de fondo dure al menos como el speech + 3s fadeout
	if len(background) < len(speech) + 3000:
	background = background * math.ceil((len(speech)+3000)/len(background))

	# Combinar audio con fadeout
	combined = speech.overlay(background[:len(speech)])
	fade_out = background[len(speech):len(speech)+3000].fade_out(3000)
	final_audio = combined + fade_out

	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
	final_audio.export(tmp_file.name, format="mp3")
	return tmp_file.name

	# Procesar múltiples videos
	def process_videos(audio_path, video_files):
	audio_duration = AudioSegment.from_file(audio_path).duration_seconds

	# Crear video concatenado
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as concat_video:
	# Crear lista de videos para concatenar
	with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as list_file:
	list_file.write("\n".join([f"file '{v.name}'" for v in video_files]))
	list_file.close()

	subprocess.run([
	"ffmpeg", "-y",
	"-f", "concat",
	"-safe", "0",
	"-i", list_file.name,
	"-c", "copy",
	concat_video.name
	], check=True)

	# Crear video final con loop
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as final_video:
	subprocess.run([
	"ffmpeg", "-y",
	"-stream_loop", "-1",
	"-i", concat_video.name,
	"-i", audio_path,
	"-t", str(audio_duration + 3),
	"-c:v", "libx264",
	"-c:a", "aac",
	"-vf", "fade=t=out:st={}:d=3".format(audio_duration),
	"-af", "afade=t=out:st={}:d=3".format(audio_duration),
	"-shortest",
	final_video.name
	], check=True)

	return final_video.name

	# Función principal
	async def tts_interface(text, voice, rate, pitch, bg_music, video_files):
	# Generar audio principal
	audio_path, warning = await text_to_speech(text, voice, rate, pitch)
	if warning:
	return None, None, gr.Warning(warning)

	try:
	# Agregar música de fondo
	if bg_music:
	audio_path = add_background_music(audio_path, bg_music)

	# Procesar videos
	if video_files:
	video_path = process_videos(audio_path, video_files)
	else:
	video_path = None

	return audio_path, video_path, None

	except Exception as e:
	return None, None, gr.Warning(f"Processing error: {str(e)}")
	finally:
	if 'audio_path' in locals() and os.path.exists(audio_path):
	os.remove(audio_path)

	# Crear interfaz
	async def create_demo():
	voices = await get_voices()

	demo = gr.Interface(
	fn=tts_interface,
	inputs=[
	gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."),
	gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice"),
	gr.Slider(-50, 50, 0, label="Speech Rate (%)"),
	gr.Slider(-20, 20, 0, label="Pitch (Hz)"),
	gr.Audio(label="Background Music", type="filepath"),
	gr.File(label="Upload Videos", file_types=[".mp4", ".mov"], file_count="multiple")
	],
	outputs=[
	gr.Audio(label="Generated Audio", type="filepath"),
	gr.Video(label="Final Video"),
	gr.Markdown(visible=False)
	],
	title="Multi-Video TTS with Loop",
	description="Create videos with: 1. Text-to-speech 2. Background music 3. Multiple video loop",
	examples=[
	["Hello world! This is a test with multiple videos.",
	"en-US-JennyNeural - en-US (Female)",
	0, 0, None, None]
	],
	css="#component-0 {max-width: 800px}"
	)
	return demo

	async def main():
	demo = await create_demo()
	demo.queue()
	demo.launch()

	if __name__ == "__main__":
	asyncio.run(main())