Kokoro-TTS-Translate-GPU

Running

App Files Files Community

shukdevdatta123 commited on Feb 3

Commit

da8e3d2

verified ·

1 Parent(s): 9def98f

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -4

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import gradio as gr
 import openai
-from kokoro import KPipeline
 import random
 import os
 import torch
 import time
-from kokoro import KPipeline, KModel
 # Set up the OpenAI API key (optional)
 openai.api_key = None  # Will be set by the user through the UI
@@ -15,7 +14,7 @@ CUDA_AVAILABLE = torch.cuda.is_available()
 # Initialize the models and pipelines (for TTS)
 models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if CUDA_AVAILABLE else [])}
-pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'abefhijpz'}
 # Load lexicon for specific languages
 pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
 pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
@@ -58,7 +57,7 @@ def translate_to_english(api_key, text, lang_code):
     try:
         prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"
         response = openai.ChatCompletion.create(
-            model="gpt-4o",
             messages=[{"role": "system", "content": "You are a helpful assistant that translates text."},
                       {"role": "user", "content": prompt}]
         )
@@ -87,6 +86,80 @@ def generate_audio_from_text(text, lang_code, voice, speed, use_gpu=True):
                 raise gr.Error(e)
         return (24000, audio.numpy())
 # Gradio interface setup
 with gr.Blocks() as app:
     gr.Markdown("### Kokoro Text-to-Speech with Translation")

 import gradio as gr
 import openai
+from kokoro import KPipeline, KModel
 import random
 import os
 import torch
 import time
 # Set up the OpenAI API key (optional)
 openai.api_key = None  # Will be set by the user through the UI
 # Initialize the models and pipelines (for TTS)
 models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if CUDA_AVAILABLE else [])}
+pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'abefhijpz']
 # Load lexicon for specific languages
 pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
 pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
     try:
         prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"
         response = openai.ChatCompletion.create(
+            model="gpt-4",
             messages=[{"role": "system", "content": "You are a helpful assistant that translates text."},
                       {"role": "user", "content": prompt}]
         )
                 raise gr.Error(e)
         return (24000, audio.numpy())
+# Define your available voices here in the CHOICES dictionary
+CHOICES = {
+    'af_heart': '🇺🇸 🚺 Heart ❤️',
+    'af_bella': '🇺🇸 🚺 Bella 🔥',
+    'af_nicole': '🇺🇸 🚺 Nicole 🎧',
+    'af_aoede': '🇺🇸 🚺 Aoede',
+    'af_kore': '🇺🇸 🚺 Kore',
+    'af_sarah': '🇺🇸 🚺 Sarah',
+    'af_nova': '🇺🇸 🚺 Nova',
+    'af_sky': '🇺🇸 🚺 Sky',
+    'af_alloy': '🇺🇸 🚺 Alloy',
+    'af_jessica': '🇺🇸 🚺 Jessica',
+    'af_river': '🇺🇸 🚺 River',
+    'am_michael': '🇺🇸 🚹 Michael',
+    'am_fenrir': '🇺🇸 🚹 Fenrir',
+    'am_puck': '🇺🇸 🚹 Puck',
+    'am_echo': '🇺🇸 🚹 Echo',
+    'am_eric': '🇺🇸 🚹 Eric',
+    'am_liam': '🇺🇸 🚹 Liam',
+    'am_onyx': '🇺🇸 🚹 Onyx',
+    'am_santa': '🇺🇸 🚹 Santa',
+    'am_adam': '🇺🇸 🚹 Adam',
+    'bf_emma': '🇬🇧 🚺 Emma',
+    'bf_isabella': '🇬🇧 🚺 Isabella',
+    'bf_alice': '🇬🇧 🚺 Alice',
+    'bf_lily': '🇬🇧 🚺 Lily',
+    'bm_george': '🇬🇧 🚹 George',
+    'bm_fable': '🇬🇧 🚹 Fable',
+    'bm_lewis': '🇬🇧 🚹 Lewis',
+    'bm_daniel': '🇬🇧 🚹 Daniel',
+    'ef_dora': '🇪🇸 🚺 Dora',
+    'em_alex': '🇪🇸 🚹 Alex',
+    'em_santa': '🇪🇸 🚹 Santa',
+    'ff_siwis': '🇫🇷 🚺 Siwis',
+    'hf_alpha': '🇮🇳 🚹 Alpha',
+    'hf_beta': '🇮🇳 🚹 Beta',
+    'hm_omega': '🇮🇳 🚹 Omega',
+    'hm_psi': '🇮🇳 🚹 Psi',
+    'if_sara': '🇮🇹 🚺 Sara',
+    'im_nicola': '🇮🇹 🚺 Nicola',
+    'jf_alpha': '🇯🇵 🚹 Alpha',
+    'jf_gongitsune': '🇯🇵 🚹 Gongitsune',
+    'jf_nezumi': '🇯🇵 🚹 Nezumi',
+    'jf_tebukuro': '🇯🇵 🚹 Tebukuro',
+    'jm_kumo': '🇯🇵 🚹 Kumo',
+    'pf_dora': '🇧🇷 🚺 Dora',
+    'pm_alex': '🇧🇷 🚹 Alex',
+    'pm_santa': '🇧🇷 🚹 Santa',
+    'zf_xiaobei': '🇨🇳 🚺 Xiaobei',
+    'zf_xiaoni': '🇨🇳 🚺 Xiaoni',
+    'zf_xiaoxiao': '🇨🇳 🚺 Xiaoxiao',
+    'zf_xiaoyi': '🇨🇳 🚺 Xiaoyi',
+    'zm_yunjian': '🇨🇳 🚹 Yunjian',
+    'zm_yunxi': '🇨🇳 🚹 Yunxi',
+    'zm_yunxia': '🇨🇳 🚹 Yunxia',
+    'zm_yunyang': '🇨🇳 🚹 Yunyang'
+}
 # Gradio interface setup
 with gr.Blocks() as app:
     gr.Markdown("### Kokoro Text-to-Speech with Translation")