gnosticdev's picture
Update app.py
209bf9f verified
raw
history blame
5.44 kB
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
import math
from pydub import AudioSegment
import subprocess
# Función para obtener voces disponibles
async def get_voices():
voices = await edge_tts.list_voices()
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
# Conversión de texto a voz
async def text_to_speech(text, voice, rate, pitch):
if not text.strip() or not voice:
return (None, "Please enter text and select a voice") if not text else (None, "Please select a voice")
try:
communicate = edge_tts.Communicate(
text,
voice.split(" - ")[0],
rate=f"{rate:+d}%",
pitch=f"{pitch:+d}Hz"
)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
await communicate.save(tmp_file.name)
return tmp_file.name, None
except Exception as e:
return None, f"Speech generation failed: {str(e)}"
# Agregar música de fondo
def add_background_music(speech_path, bg_music_path):
speech = AudioSegment.from_file(speech_path)
background = AudioSegment.from_file(bg_music_path) - 16 # 15% volume
# Asegurar que la música de fondo dure al menos como el speech + 3s fadeout
if len(background) < len(speech) + 3000:
background = background * math.ceil((len(speech)+3000)/len(background))
# Combinar audio con fadeout
combined = speech.overlay(background[:len(speech)])
fade_out = background[len(speech):len(speech)+3000].fade_out(3000)
final_audio = combined + fade_out
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
final_audio.export(tmp_file.name, format="mp3")
return tmp_file.name
# Procesar múltiples videos
def process_videos(audio_path, video_files):
audio_duration = AudioSegment.from_file(audio_path).duration_seconds
# Crear video concatenado
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as concat_video:
# Crear lista de videos para concatenar
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as list_file:
list_file.write("\n".join([f"file '{v.name}'" for v in video_files]))
list_file.close()
subprocess.run([
"ffmpeg", "-y",
"-f", "concat",
"-safe", "0",
"-i", list_file.name,
"-c", "copy",
concat_video.name
], check=True)
# Crear video final con loop
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as final_video:
subprocess.run([
"ffmpeg", "-y",
"-stream_loop", "-1",
"-i", concat_video.name,
"-i", audio_path,
"-t", str(audio_duration + 3),
"-c:v", "libx264",
"-c:a", "aac",
"-vf", "fade=t=out:st={}:d=3".format(audio_duration),
"-af", "afade=t=out:st={}:d=3".format(audio_duration),
"-shortest",
final_video.name
], check=True)
return final_video.name
# Función principal
async def tts_interface(text, voice, rate, pitch, bg_music, video_files):
# Generar audio principal
audio_path, warning = await text_to_speech(text, voice, rate, pitch)
if warning:
return None, None, gr.Warning(warning)
try:
# Agregar música de fondo
if bg_music:
audio_path = add_background_music(audio_path, bg_music)
# Procesar videos
if video_files:
video_path = process_videos(audio_path, video_files)
else:
video_path = None
return audio_path, video_path, None
except Exception as e:
return None, None, gr.Warning(f"Processing error: {str(e)}")
finally:
if 'audio_path' in locals() and os.path.exists(audio_path):
os.remove(audio_path)
# Crear interfaz
async def create_demo():
voices = await get_voices()
demo = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."),
gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice"),
gr.Slider(-50, 50, 0, label="Speech Rate (%)"),
gr.Slider(-20, 20, 0, label="Pitch (Hz)"),
gr.Audio(label="Background Music", type="filepath"),
gr.File(label="Upload Videos", file_types=[".mp4", ".mov"], file_count="multiple")
],
outputs=[
gr.Audio(label="Generated Audio", type="filepath"),
gr.Video(label="Final Video"),
gr.Markdown(visible=False)
],
title="Multi-Video TTS with Loop",
description="Create videos with: 1. Text-to-speech 2. Background music 3. Multiple video loop",
examples=[
["Hello world! This is a test with multiple videos.",
"en-US-JennyNeural - en-US (Female)",
0, 0, None, None]
],
css="#component-0 {max-width: 800px}"
)
return demo
async def main():
demo = await create_demo()
demo.queue()
demo.launch()
if __name__ == "__main__":
asyncio.run(main())