PeterPinetree commited on
Commit
6112cdd
·
verified ·
1 Parent(s): 688f7e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -17
app.py CHANGED
@@ -2,27 +2,25 @@ import gradio as gr
2
  import torch
3
  import soundfile as sf
4
  import tempfile
5
- from kokoro_onnx import Kokoro
6
 
7
- # Load Kokoro TTS Model (No need for external files)
8
- kokoro = Kokoro()
 
 
9
 
10
- # Fetch available voices dynamically (if supported)
11
- try:
12
- voices = kokoro.get_voices() # If `get_voices()` exists, use it
13
- except AttributeError:
14
- # Default voice list if `get_voices()` isn't available
15
- voices = ['af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky',
16
- 'am_adam', 'am_michael', 'bf_emma', 'bf_isabella',
17
- 'bm_george', 'bm_lewis']
18
 
19
  def generate_speech(text, voice, speed, show_transcript):
20
- """Convert input text to speech using Kokoro TTS"""
21
- samples, sample_rate = kokoro.create(text, voice=voice, speed=float(speed))
22
-
23
- # Save audio file temporarily
 
 
24
  temp_file = tempfile.mktemp(suffix=".wav")
25
- sf.write(temp_file, samples, sample_rate)
26
 
27
  # Return audio and optional transcript
28
  return temp_file, text if show_transcript else None
@@ -32,7 +30,7 @@ interface = gr.Interface(
32
  fn=generate_speech,
33
  inputs=[
34
  gr.Textbox(label="Input Text", lines=5, placeholder="Type here..."),
35
- gr.Dropdown(choices=voices, label="Select Voice", value=voices[0]),
36
  gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"),
37
  gr.Checkbox(label="Show Transcript", value=True)
38
  ],
@@ -48,3 +46,4 @@ interface = gr.Interface(
48
  # Launch the app
49
  if __name__ == "__main__":
50
  interface.launch()
 
 
2
  import torch
3
  import soundfile as sf
4
  import tempfile
5
+ from transformers import AutoModelForTextToSpeech, AutoTokenizer
6
 
7
+ # Load Kokoro-82M Model
8
+ MODEL_NAME = "hexgrad/Kokoro-82M"
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
+ model = AutoModelForTextToSpeech.from_pretrained(MODEL_NAME).to("cpu") # Change to "cuda" if running on GPU
11
 
12
+ # Define available voices (Check if Kokoro-82M has predefined voices)
13
+ voices = ['default'] # Modify if multiple voices exist
 
 
 
 
 
 
14
 
15
  def generate_speech(text, voice, speed, show_transcript):
16
+ """Convert input text to speech using Kokoro-82M"""
17
+ inputs = tokenizer(text, return_tensors="pt").to("cpu")
18
+ with torch.no_grad():
19
+ speech = model.generate(**inputs)
20
+
21
+ # Save the generated speech as a file
22
  temp_file = tempfile.mktemp(suffix=".wav")
23
+ sf.write(temp_file, speech.cpu().numpy(), 22050) # Adjust sample rate if necessary
24
 
25
  # Return audio and optional transcript
26
  return temp_file, text if show_transcript else None
 
30
  fn=generate_speech,
31
  inputs=[
32
  gr.Textbox(label="Input Text", lines=5, placeholder="Type here..."),
33
+ gr.Dropdown(choices=voices, label="Select Voice", value='default'),
34
  gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"),
35
  gr.Checkbox(label="Show Transcript", value=True)
36
  ],
 
46
  # Launch the app
47
  if __name__ == "__main__":
48
  interface.launch()
49
+