|
import gradio as gr |
|
import edge_tts |
|
import asyncio |
|
import tempfile |
|
import os |
|
import math |
|
from pydub import AudioSegment |
|
import subprocess |
|
|
|
|
|
async def get_voices(): |
|
voices = await edge_tts.list_voices() |
|
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices} |
|
|
|
|
|
async def text_to_speech(text, voice, rate, pitch): |
|
if not text.strip() or not voice: |
|
return (None, "Please enter text and select a voice") if not text else (None, "Please select a voice") |
|
|
|
try: |
|
communicate = edge_tts.Communicate( |
|
text, |
|
voice.split(" - ")[0], |
|
rate=f"{rate:+d}%", |
|
pitch=f"{pitch:+d}Hz" |
|
) |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: |
|
await communicate.save(tmp_file.name) |
|
return tmp_file.name, None |
|
except Exception as e: |
|
return None, f"Speech generation failed: {str(e)}" |
|
|
|
|
|
def add_background_music(speech_path, bg_music_path): |
|
speech = AudioSegment.from_file(speech_path) |
|
background = AudioSegment.from_file(bg_music_path) - 16 |
|
|
|
|
|
if len(background) < len(speech) + 3000: |
|
background = background * math.ceil((len(speech)+3000)/len(background)) |
|
|
|
|
|
combined = speech.overlay(background[:len(speech)]) |
|
fade_out = background[len(speech):len(speech)+3000].fade_out(3000) |
|
final_audio = combined + fade_out |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: |
|
final_audio.export(tmp_file.name, format="mp3") |
|
return tmp_file.name |
|
|
|
|
|
def process_videos(audio_path, video_files): |
|
audio_duration = AudioSegment.from_file(audio_path).duration_seconds |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as concat_video: |
|
|
|
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as list_file: |
|
list_file.write("\n".join([f"file '{v.name}'" for v in video_files])) |
|
list_file.close() |
|
|
|
subprocess.run([ |
|
"ffmpeg", "-y", |
|
"-f", "concat", |
|
"-safe", "0", |
|
"-i", list_file.name, |
|
"-c", "copy", |
|
concat_video.name |
|
], check=True) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as final_video: |
|
subprocess.run([ |
|
"ffmpeg", "-y", |
|
"-stream_loop", "-1", |
|
"-i", concat_video.name, |
|
"-i", audio_path, |
|
"-t", str(audio_duration + 3), |
|
"-c:v", "libx264", |
|
"-c:a", "aac", |
|
"-vf", "fade=t=out:st={}:d=3".format(audio_duration), |
|
"-af", "afade=t=out:st={}:d=3".format(audio_duration), |
|
"-shortest", |
|
final_video.name |
|
], check=True) |
|
|
|
return final_video.name |
|
|
|
|
|
async def tts_interface(text, voice, rate, pitch, bg_music, video_files): |
|
|
|
audio_path, warning = await text_to_speech(text, voice, rate, pitch) |
|
if warning: |
|
return None, None, gr.Warning(warning) |
|
|
|
try: |
|
|
|
if bg_music: |
|
audio_path = add_background_music(audio_path, bg_music) |
|
|
|
|
|
if video_files: |
|
video_path = process_videos(audio_path, video_files) |
|
else: |
|
video_path = None |
|
|
|
return audio_path, video_path, None |
|
|
|
except Exception as e: |
|
return None, None, gr.Warning(f"Processing error: {str(e)}") |
|
finally: |
|
if 'audio_path' in locals() and os.path.exists(audio_path): |
|
os.remove(audio_path) |
|
|
|
|
|
async def create_demo(): |
|
voices = await get_voices() |
|
|
|
demo = gr.Interface( |
|
fn=tts_interface, |
|
inputs=[ |
|
gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."), |
|
gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice"), |
|
gr.Slider(-50, 50, 0, label="Speech Rate (%)"), |
|
gr.Slider(-20, 20, 0, label="Pitch (Hz)"), |
|
gr.Audio(label="Background Music", type="filepath"), |
|
gr.File(label="Upload Videos", file_types=[".mp4", ".mov"], file_count="multiple") |
|
], |
|
outputs=[ |
|
gr.Audio(label="Generated Audio", type="filepath"), |
|
gr.Video(label="Final Video"), |
|
gr.Markdown(visible=False) |
|
], |
|
title="Multi-Video TTS with Loop", |
|
description="Create videos with: 1. Text-to-speech 2. Background music 3. Multiple video loop", |
|
examples=[ |
|
["Hello world! This is a test with multiple videos.", |
|
"en-US-JennyNeural - en-US (Female)", |
|
0, 0, None, None] |
|
], |
|
css="#component-0 {max-width: 800px}" |
|
) |
|
return demo |
|
|
|
async def main(): |
|
demo = await create_demo() |
|
demo.queue() |
|
demo.launch() |
|
|
|
if __name__ == "__main__": |
|
asyncio.run(main()) |