File size: 4,396 Bytes
63f1d6d 299c2df 63f1d6d 299c2df 63f1d6d 299c2df 63f1d6d 8529fe9 63f1d6d 8529fe9 63f1d6d 299c2df 8529fe9 299c2df 5f7fd2a 299c2df 5f7fd2a d81bde6 63f1d6d d81bde6 ff4d2d2 d81bde6 63f1d6d 299c2df 5f7fd2a 63f1d6d 299c2df 63f1d6d d81bde6 63f1d6d 4f5115c 63f1d6d 8529fe9 63f1d6d 299c2df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from pydub import AudioSegment
from pydub.playback import play
import math
# Funci贸n para obtener las voces disponibles
async def get_voices():
voices = await edge_tts.list_voices()
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
# Funci贸n principal de conversi贸n de texto a voz
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, "Please enter text to convert."
if not voice:
return None, "Please select a voice."
voice_short_name = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
# Funci贸n para agregar el fondo musical al speech
def add_background_music(speech_file, background_music_file, output_file):
# Cargar los archivos de audio
speech = AudioSegment.from_mp3(speech_file)
background_music = AudioSegment.from_mp3(background_music_file)
# Ajustar el volumen del fondo musical al 15%
background_music = background_music - 16 # Reducci贸n aproximada para 15%
# Repetir el fondo musical si es m谩s corto que el speech
if len(background_music) < len(speech):
repetitions = math.ceil(len(speech) / len(background_music))
background_music = background_music * repetitions
# Cortar el fondo musical para que coincida con la duraci贸n del speech
background_music = background_music[:len(speech)]
# Superponer el speech y el fondo musical
final_audio = speech.overlay(background_music)
# Exportar el audio resultante
final_audio.export(output_file, format="mp3")
print(f"Archivo generado exitosamente: {output_file}")
# Interfaz Gradio
async def tts_interface(text, voice, rate, pitch, background_music):
# Generar el speech
speech_file, warning = await text_to_speech(text, voice, rate, pitch)
if warning:
return None, None, gr.Warning(warning)
# Verificar si se proporcion贸 un archivo de fondo musical
if background_music and background_music != "":
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
output_file = tmp_file.name
add_background_music(speech_file, background_music, output_file)
# Eliminar el archivo temporal del speech original
os.remove(speech_file)
return output_file, None, None
# Si no hay fondo musical, devolver el speech original
return speech_file, None, None
async def create_demo():
voices = await get_voices()
description = """
Convert text to speech with audio background to 15% volumen, perfect for audiobooks or youtube videos ! using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
"""
demo = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(label="Input Text", lines=5),
gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
gr.Audio(label="Background Music", type="filepath") # Sin el argumento 'optional'
],
outputs=[
gr.Audio(label="Generated Audio", type="filepath"),
gr.Image(label="Visualization", visible=False),
gr.Markdown(label="Warning", visible=False)
],
title="Edge TTS Text-to-Speech",
description=description,
article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
analytics_enabled=False,
allow_flagging="manual",
api_name=None
)
return demo
async def main():
demo = await create_demo()
demo.queue(default_concurrency_limit=5)
demo.launch(show_api=False)
if __name__ == "__main__":
asyncio.run(main()) |