Update app.py
Browse files
app.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import openai
|
| 3 |
-
from kokoro import KPipeline
|
| 4 |
import random
|
| 5 |
import os
|
| 6 |
import torch
|
| 7 |
import time
|
| 8 |
-
from kokoro import KPipeline, KModel
|
| 9 |
|
| 10 |
# Set up the OpenAI API key (optional)
|
| 11 |
openai.api_key = None # Will be set by the user through the UI
|
|
@@ -15,7 +14,7 @@ CUDA_AVAILABLE = torch.cuda.is_available()
|
|
| 15 |
|
| 16 |
# Initialize the models and pipelines (for TTS)
|
| 17 |
models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if CUDA_AVAILABLE else [])}
|
| 18 |
-
pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'abefhijpz'
|
| 19 |
# Load lexicon for specific languages
|
| 20 |
pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
|
| 21 |
pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
|
|
@@ -58,7 +57,7 @@ def translate_to_english(api_key, text, lang_code):
|
|
| 58 |
try:
|
| 59 |
prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"
|
| 60 |
response = openai.ChatCompletion.create(
|
| 61 |
-
model="gpt-
|
| 62 |
messages=[{"role": "system", "content": "You are a helpful assistant that translates text."},
|
| 63 |
{"role": "user", "content": prompt}]
|
| 64 |
)
|
|
@@ -87,6 +86,80 @@ def generate_audio_from_text(text, lang_code, voice, speed, use_gpu=True):
|
|
| 87 |
raise gr.Error(e)
|
| 88 |
return (24000, audio.numpy())
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
# Gradio interface setup
|
| 91 |
with gr.Blocks() as app:
|
| 92 |
gr.Markdown("### Kokoro Text-to-Speech with Translation")
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import openai
|
| 3 |
+
from kokoro import KPipeline, KModel
|
| 4 |
import random
|
| 5 |
import os
|
| 6 |
import torch
|
| 7 |
import time
|
|
|
|
| 8 |
|
| 9 |
# Set up the OpenAI API key (optional)
|
| 10 |
openai.api_key = None # Will be set by the user through the UI
|
|
|
|
| 14 |
|
| 15 |
# Initialize the models and pipelines (for TTS)
|
| 16 |
models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if CUDA_AVAILABLE else [])}
|
| 17 |
+
pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'abefhijpz']
|
| 18 |
# Load lexicon for specific languages
|
| 19 |
pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
|
| 20 |
pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
|
|
|
|
| 57 |
try:
|
| 58 |
prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"
|
| 59 |
response = openai.ChatCompletion.create(
|
| 60 |
+
model="gpt-4",
|
| 61 |
messages=[{"role": "system", "content": "You are a helpful assistant that translates text."},
|
| 62 |
{"role": "user", "content": prompt}]
|
| 63 |
)
|
|
|
|
| 86 |
raise gr.Error(e)
|
| 87 |
return (24000, audio.numpy())
|
| 88 |
|
| 89 |
+
# Define your available voices here in the CHOICES dictionary
|
| 90 |
+
CHOICES = {
|
| 91 |
+
'af_heart': '🇺🇸 🚺 Heart ❤️',
|
| 92 |
+
'af_bella': '🇺🇸 🚺 Bella 🔥',
|
| 93 |
+
'af_nicole': '🇺🇸 🚺 Nicole 🎧',
|
| 94 |
+
'af_aoede': '🇺🇸 🚺 Aoede',
|
| 95 |
+
'af_kore': '🇺🇸 🚺 Kore',
|
| 96 |
+
'af_sarah': '🇺🇸 🚺 Sarah',
|
| 97 |
+
'af_nova': '🇺🇸 🚺 Nova',
|
| 98 |
+
'af_sky': '🇺🇸 🚺 Sky',
|
| 99 |
+
'af_alloy': '🇺🇸 🚺 Alloy',
|
| 100 |
+
'af_jessica': '🇺🇸 🚺 Jessica',
|
| 101 |
+
'af_river': '🇺🇸 🚺 River',
|
| 102 |
+
|
| 103 |
+
'am_michael': '🇺🇸 🚹 Michael',
|
| 104 |
+
'am_fenrir': '🇺🇸 🚹 Fenrir',
|
| 105 |
+
'am_puck': '🇺🇸 🚹 Puck',
|
| 106 |
+
'am_echo': '🇺🇸 🚹 Echo',
|
| 107 |
+
'am_eric': '🇺🇸 🚹 Eric',
|
| 108 |
+
'am_liam': '🇺🇸 🚹 Liam',
|
| 109 |
+
'am_onyx': '🇺🇸 🚹 Onyx',
|
| 110 |
+
'am_santa': '🇺🇸 🚹 Santa',
|
| 111 |
+
'am_adam': '🇺🇸 🚹 Adam',
|
| 112 |
+
|
| 113 |
+
'bf_emma': '🇬🇧 🚺 Emma',
|
| 114 |
+
'bf_isabella': '🇬🇧 🚺 Isabella',
|
| 115 |
+
'bf_alice': '🇬🇧 🚺 Alice',
|
| 116 |
+
'bf_lily': '🇬🇧 🚺 Lily',
|
| 117 |
+
|
| 118 |
+
'bm_george': '🇬🇧 🚹 George',
|
| 119 |
+
'bm_fable': '🇬🇧 🚹 Fable',
|
| 120 |
+
'bm_lewis': '🇬🇧 🚹 Lewis',
|
| 121 |
+
'bm_daniel': '🇬🇧 🚹 Daniel',
|
| 122 |
+
|
| 123 |
+
'ef_dora': '🇪🇸 🚺 Dora',
|
| 124 |
+
|
| 125 |
+
'em_alex': '🇪🇸 🚹 Alex',
|
| 126 |
+
'em_santa': '🇪🇸 🚹 Santa',
|
| 127 |
+
|
| 128 |
+
'ff_siwis': '🇫🇷 🚺 Siwis',
|
| 129 |
+
|
| 130 |
+
'hf_alpha': '🇮🇳 🚹 Alpha',
|
| 131 |
+
'hf_beta': '🇮🇳 🚹 Beta',
|
| 132 |
+
|
| 133 |
+
'hm_omega': '🇮🇳 🚹 Omega',
|
| 134 |
+
'hm_psi': '🇮🇳 🚹 Psi',
|
| 135 |
+
|
| 136 |
+
'if_sara': '🇮🇹 🚺 Sara',
|
| 137 |
+
|
| 138 |
+
'im_nicola': '🇮🇹 🚺 Nicola',
|
| 139 |
+
|
| 140 |
+
'jf_alpha': '🇯🇵 🚹 Alpha',
|
| 141 |
+
'jf_gongitsune': '🇯🇵 🚹 Gongitsune',
|
| 142 |
+
'jf_nezumi': '🇯🇵 🚹 Nezumi',
|
| 143 |
+
'jf_tebukuro': '🇯🇵 🚹 Tebukuro',
|
| 144 |
+
|
| 145 |
+
'jm_kumo': '🇯🇵 🚹 Kumo',
|
| 146 |
+
|
| 147 |
+
'pf_dora': '🇧🇷 🚺 Dora',
|
| 148 |
+
|
| 149 |
+
'pm_alex': '🇧🇷 🚹 Alex',
|
| 150 |
+
'pm_santa': '🇧🇷 🚹 Santa',
|
| 151 |
+
|
| 152 |
+
'zf_xiaobei': '🇨🇳 🚺 Xiaobei',
|
| 153 |
+
'zf_xiaoni': '🇨🇳 🚺 Xiaoni',
|
| 154 |
+
'zf_xiaoxiao': '🇨🇳 🚺 Xiaoxiao',
|
| 155 |
+
'zf_xiaoyi': '🇨🇳 🚺 Xiaoyi',
|
| 156 |
+
|
| 157 |
+
'zm_yunjian': '🇨🇳 🚹 Yunjian',
|
| 158 |
+
'zm_yunxi': '🇨🇳 🚹 Yunxi',
|
| 159 |
+
'zm_yunxia': '🇨🇳 🚹 Yunxia',
|
| 160 |
+
'zm_yunyang': '🇨🇳 🚹 Yunyang'
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
# Gradio interface setup
|
| 164 |
with gr.Blocks() as app:
|
| 165 |
gr.Markdown("### Kokoro Text-to-Speech with Translation")
|