File size: 6,483 Bytes
1b41e6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
async def get_voices():
voices = await edge_tts.list_voices()
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, "Please enter text to convert."
if not voice:
return None, "Please select a voice."
voice_short_name = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
# Save directly to mp3 file (Edge TTS actually outputs mp3 format)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
async def tts_interface(text, voice, rate, pitch):
audio, warning = await text_to_speech(text, voice, rate, pitch)
if warning:
return audio, gr.Warning(warning)
return audio, None
async def create_demo():
voices = await get_voices()
with gr.Blocks(analytics_enabled=False) as demo:
gr.Markdown("# ποΈ Edge TTS Text-to-Speech")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("## Text-to-Speech with Microsoft Edge TTS")
gr.Markdown("""
Convert text to speech using Microsoft Edge TTS.
Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
""")
gr.HTML("""
<div style="margin: 20px 0; padding: 15px; border: 1px solid #4CAF50; border-radius: 10px; background-color: #f1f8e9;">
<p style="margin-top: 0;"><b>Looking for the new version with more features?</b></p>
<p>The new version includes:</p>
<ul>
<li><b>SRT Subtitle Support</b>: Upload SRT files or input SRT format text</li>
<li><b>File Upload</b>: Easily upload TXT or SRT files</li>
<li><b>Smart Format Detection</b>: Detects plain text or SRT format</li>
<li><b>MP3 Output</b>: Generate high-quality MP3 audio</li>
</ul>
<div style="text-align: center; margin-top: 15px;">
<a href="https://text-to-speech.wingetgui.com/" target="_blank"
style="display: inline-block;
background: linear-gradient(45deg, #4CAF50, #8BC34A);
color: white;
padding: 12px 30px;
text-decoration: none;
border-radius: 30px;
font-weight: bold;
font-size: 16px;
box-shadow: 0 4px 10px rgba(76, 175, 80, 0.3);
transition: all 0.3s ease;">Try New Version β</a>
</div>
</div>
""")
with gr.Column(scale=1):
gr.HTML("""
<div style="height: 100%; background-color: #f0f8ff; padding: 15px; border-radius: 10px;">
<h2 style="color: #1e90ff; margin-top: 0;">Turn Your Text Into Professional Videos!</h2>
<ul style="list-style-type: none; padding-left: 0;">
<li>β
<b>40+ languages and 300+ voices supported</b></li>
<li>β
<b>Custom backgrounds, music, and visual effects</b></li>
<li>β
<b>Create engaging video content from simple text</b></li>
<li>β
<b>Perfect for educators, content creators, and marketers</b></li>
</ul>
<div style="text-align: center; margin-top: 20px;">
<span style="font-size: 96px;">π¬</span>
<div style="margin-top: 15px;">
<a href="https://text2video.wingetgui.com/" target="_blank"
style="display: inline-block;
background: linear-gradient(45deg, #2196F3, #21CBF3);
color: white;
padding: 12px 30px;
text-decoration: none;
border-radius: 30px;
font-weight: bold;
font-size: 16px;
box-shadow: 0 4px 10px rgba(33, 150, 243, 0.3);
transition: all 0.3s ease;">Try Text-to-Video β</a>
</div>
</div>
</div>
""")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Input Text", lines=5)
voice_dropdown = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1)
pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
generate_btn = gr.Button("Generate Speech", variant="primary")
audio_output = gr.Audio(label="Generated Audio", type="filepath")
warning_md = gr.Markdown(label="Warning", visible=False)
generate_btn.click(
fn=tts_interface,
inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
outputs=[audio_output, warning_md]
)
gr.Markdown("Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!")
return demo
async def main():
demo = await create_demo()
demo.queue(default_concurrency_limit=50)
demo.launch(show_api=False)
if __name__ == "__main__":
asyncio.run(main())
|