File size: 1,775 Bytes
1269a6e
 
867ffb1
 
6112cdd
1269a6e
6112cdd
 
 
 
1269a6e
6112cdd
 
1269a6e
867ffb1
6112cdd
 
 
 
 
 
867ffb1
6112cdd
867ffb1
 
 
1269a6e
867ffb1
 
 
 
 
6112cdd
867ffb1
 
 
 
 
 
 
 
 
 
1269a6e
 
867ffb1
 
 
6112cdd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import gradio as gr
import torch
import soundfile as sf
import tempfile
from transformers import AutoModelForTextToSpeech, AutoTokenizer

# Load Kokoro-82M Model
MODEL_NAME = "hexgrad/Kokoro-82M"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForTextToSpeech.from_pretrained(MODEL_NAME).to("cpu")  # Change to "cuda" if running on GPU

# Define available voices (Check if Kokoro-82M has predefined voices)
voices = ['default']  # Modify if multiple voices exist

def generate_speech(text, voice, speed, show_transcript):
    """Convert input text to speech using Kokoro-82M"""
    inputs = tokenizer(text, return_tensors="pt").to("cpu")
    with torch.no_grad():
        speech = model.generate(**inputs)

    # Save the generated speech as a file
    temp_file = tempfile.mktemp(suffix=".wav")
    sf.write(temp_file, speech.cpu().numpy(), 22050)  # Adjust sample rate if necessary
    
    # Return audio and optional transcript
    return temp_file, text if show_transcript else None

# Gradio UI
interface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Textbox(label="Input Text", lines=5, placeholder="Type here..."),
        gr.Dropdown(choices=voices, label="Select Voice", value='default'),
        gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"),
        gr.Checkbox(label="Show Transcript", value=True)
    ],
    outputs=[
        gr.Audio(label="Generated Speech"),
        gr.Textbox(label="Transcript", visible=True)
    ],
    title="Educational Text-to-Speech",
    description="Enter text, choose a voice, and generate speech. Use the transcript option to follow along while listening.",
    allow_flagging="never"
)

# Launch the app
if __name__ == "__main__":
    interface.launch()