HomeworkHelper / app.py
PeterPinetree's picture
Update app.py
867ffb1 verified
raw
history blame
1.75 kB
import gradio as gr
import torch
import soundfile as sf
import tempfile
from kokoro_onnx import Kokoro
# Load Kokoro TTS Model (No need for external files)
kokoro = Kokoro()
# Fetch available voices dynamically (if supported)
try:
voices = kokoro.get_voices() # If `get_voices()` exists, use it
except AttributeError:
# Default voice list if `get_voices()` isn't available
voices = ['af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky',
'am_adam', 'am_michael', 'bf_emma', 'bf_isabella',
'bm_george', 'bm_lewis']
def generate_speech(text, voice, speed, show_transcript):
"""Convert input text to speech using Kokoro TTS"""
samples, sample_rate = kokoro.create(text, voice=voice, speed=float(speed))
# Save audio file temporarily
temp_file = tempfile.mktemp(suffix=".wav")
sf.write(temp_file, samples, sample_rate)
# Return audio and optional transcript
return temp_file, text if show_transcript else None
# Gradio UI
interface = gr.Interface(
fn=generate_speech,
inputs=[
gr.Textbox(label="Input Text", lines=5, placeholder="Type here..."),
gr.Dropdown(choices=voices, label="Select Voice", value=voices[0]),
gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"),
gr.Checkbox(label="Show Transcript", value=True)
],
outputs=[
gr.Audio(label="Generated Speech"),
gr.Textbox(label="Transcript", visible=True)
],
title="Educational Text-to-Speech",
description="Enter text, choose a voice, and generate speech. Use the transcript option to follow along while listening.",
allow_flagging="never"
)
# Launch the app
if __name__ == "__main__":
interface.launch()