import gradio as gr import torch import soundfile as sf import tempfile from kokoro_onnx import Kokoro # Load Kokoro TTS Model (No need for external files) kokoro = Kokoro() # Fetch available voices dynamically (if supported) try: voices = kokoro.get_voices() # If `get_voices()` exists, use it except AttributeError: # Default voice list if `get_voices()` isn't available voices = ['af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky', 'am_adam', 'am_michael', 'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis'] def generate_speech(text, voice, speed, show_transcript): """Convert input text to speech using Kokoro TTS""" samples, sample_rate = kokoro.create(text, voice=voice, speed=float(speed)) # Save audio file temporarily temp_file = tempfile.mktemp(suffix=".wav") sf.write(temp_file, samples, sample_rate) # Return audio and optional transcript return temp_file, text if show_transcript else None # Gradio UI interface = gr.Interface( fn=generate_speech, inputs=[ gr.Textbox(label="Input Text", lines=5, placeholder="Type here..."), gr.Dropdown(choices=voices, label="Select Voice", value=voices[0]), gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"), gr.Checkbox(label="Show Transcript", value=True) ], outputs=[ gr.Audio(label="Generated Speech"), gr.Textbox(label="Transcript", visible=True) ], title="Educational Text-to-Speech", description="Enter text, choose a voice, and generate speech. Use the transcript option to follow along while listening.", allow_flagging="never" ) # Launch the app if __name__ == "__main__": interface.launch()