PeterPinetree commited on
Commit
867ffb1
·
verified ·
1 Parent(s): ab3b679

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -21
app.py CHANGED
@@ -1,29 +1,50 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
 
 
4
 
5
- # Load Kokoro TTS Model
6
- device = "cuda" if torch.cuda.is_available() else "cpu"
7
- model_name = "hexgrad/Kokoro-82M"
8
- model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name).to(device)
9
- processor = AutoProcessor.from_pretrained(model_name)
10
 
11
- def text_to_speech(text):
12
- """Convert input text to speech using Kokoro TTS"""
13
- inputs = processor(text, return_tensors="pt").to(device)
14
- with torch.no_grad():
15
- output = model.generate(**inputs)
16
- return output.cpu().numpy()
 
 
17
 
18
- # Gradio Interface
19
- description = "Enter text and listen to the Kokoro TTS model read it aloud."
 
 
 
 
 
 
 
 
20
 
21
- demo = gr.Interface(
22
- fn=text_to_speech,
23
- inputs=gr.Textbox(placeholder="Type something here..."),
24
- outputs=gr.Audio(type="numpy"),
25
- title="Kokoro TTS - Text-to-Speech",
26
- description=description,
 
 
 
 
 
 
 
 
 
 
27
  )
28
 
29
- demo.launch()
 
 
 
1
  import gradio as gr
2
  import torch
3
+ import soundfile as sf
4
+ import tempfile
5
+ from kokoro_onnx import Kokoro
6
 
7
+ # Load Kokoro TTS Model (No need for external files)
8
+ kokoro = Kokoro()
 
 
 
9
 
10
+ # Fetch available voices dynamically (if supported)
11
+ try:
12
+ voices = kokoro.get_voices() # If `get_voices()` exists, use it
13
+ except AttributeError:
14
+ # Default voice list if `get_voices()` isn't available
15
+ voices = ['af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky',
16
+ 'am_adam', 'am_michael', 'bf_emma', 'bf_isabella',
17
+ 'bm_george', 'bm_lewis']
18
 
19
+ def generate_speech(text, voice, speed, show_transcript):
20
+ """Convert input text to speech using Kokoro TTS"""
21
+ samples, sample_rate = kokoro.create(text, voice=voice, speed=float(speed))
22
+
23
+ # Save audio file temporarily
24
+ temp_file = tempfile.mktemp(suffix=".wav")
25
+ sf.write(temp_file, samples, sample_rate)
26
+
27
+ # Return audio and optional transcript
28
+ return temp_file, text if show_transcript else None
29
 
30
+ # Gradio UI
31
+ interface = gr.Interface(
32
+ fn=generate_speech,
33
+ inputs=[
34
+ gr.Textbox(label="Input Text", lines=5, placeholder="Type here..."),
35
+ gr.Dropdown(choices=voices, label="Select Voice", value=voices[0]),
36
+ gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"),
37
+ gr.Checkbox(label="Show Transcript", value=True)
38
+ ],
39
+ outputs=[
40
+ gr.Audio(label="Generated Speech"),
41
+ gr.Textbox(label="Transcript", visible=True)
42
+ ],
43
+ title="Educational Text-to-Speech",
44
+ description="Enter text, choose a voice, and generate speech. Use the transcript option to follow along while listening.",
45
+ allow_flagging="never"
46
  )
47
 
48
+ # Launch the app
49
+ if __name__ == "__main__":
50
+ interface.launch()