HomeworkHelper / app.py
PeterPinetree's picture
Update app.py
6112cdd verified
import gradio as gr
import torch
import soundfile as sf
import tempfile
from transformers import AutoModelForTextToSpeech, AutoTokenizer
# Load Kokoro-82M Model
MODEL_NAME = "hexgrad/Kokoro-82M"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForTextToSpeech.from_pretrained(MODEL_NAME).to("cpu") # Change to "cuda" if running on GPU
# Define available voices (Check if Kokoro-82M has predefined voices)
voices = ['default'] # Modify if multiple voices exist
def generate_speech(text, voice, speed, show_transcript):
"""Convert input text to speech using Kokoro-82M"""
inputs = tokenizer(text, return_tensors="pt").to("cpu")
with torch.no_grad():
speech = model.generate(**inputs)
# Save the generated speech as a file
temp_file = tempfile.mktemp(suffix=".wav")
sf.write(temp_file, speech.cpu().numpy(), 22050) # Adjust sample rate if necessary
# Return audio and optional transcript
return temp_file, text if show_transcript else None
# Gradio UI
interface = gr.Interface(
fn=generate_speech,
inputs=[
gr.Textbox(label="Input Text", lines=5, placeholder="Type here..."),
gr.Dropdown(choices=voices, label="Select Voice", value='default'),
gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"),
gr.Checkbox(label="Show Transcript", value=True)
],
outputs=[
gr.Audio(label="Generated Speech"),
gr.Textbox(label="Transcript", visible=True)
],
title="Educational Text-to-Speech",
description="Enter text, choose a voice, and generate speech. Use the transcript option to follow along while listening.",
allow_flagging="never"
)
# Launch the app
if __name__ == "__main__":
interface.launch()