Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import soundfile as sf | |
import tempfile | |
from transformers import AutoModelForTextToSpeech, AutoTokenizer | |
# Load Kokoro-82M Model | |
MODEL_NAME = "hexgrad/Kokoro-82M" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForTextToSpeech.from_pretrained(MODEL_NAME).to("cpu") # Change to "cuda" if running on GPU | |
# Define available voices (Check if Kokoro-82M has predefined voices) | |
voices = ['default'] # Modify if multiple voices exist | |
def generate_speech(text, voice, speed, show_transcript): | |
"""Convert input text to speech using Kokoro-82M""" | |
inputs = tokenizer(text, return_tensors="pt").to("cpu") | |
with torch.no_grad(): | |
speech = model.generate(**inputs) | |
# Save the generated speech as a file | |
temp_file = tempfile.mktemp(suffix=".wav") | |
sf.write(temp_file, speech.cpu().numpy(), 22050) # Adjust sample rate if necessary | |
# Return audio and optional transcript | |
return temp_file, text if show_transcript else None | |
# Gradio UI | |
interface = gr.Interface( | |
fn=generate_speech, | |
inputs=[ | |
gr.Textbox(label="Input Text", lines=5, placeholder="Type here..."), | |
gr.Dropdown(choices=voices, label="Select Voice", value='default'), | |
gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"), | |
gr.Checkbox(label="Show Transcript", value=True) | |
], | |
outputs=[ | |
gr.Audio(label="Generated Speech"), | |
gr.Textbox(label="Transcript", visible=True) | |
], | |
title="Educational Text-to-Speech", | |
description="Enter text, choose a voice, and generate speech. Use the transcript option to follow along while listening.", | |
allow_flagging="never" | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
interface.launch() | |