shukdevdatta123 commited on
Commit
da8e3d2
·
verified ·
1 Parent(s): 9def98f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -4
app.py CHANGED
@@ -1,11 +1,10 @@
1
  import gradio as gr
2
  import openai
3
- from kokoro import KPipeline
4
  import random
5
  import os
6
  import torch
7
  import time
8
- from kokoro import KPipeline, KModel
9
 
10
  # Set up the OpenAI API key (optional)
11
  openai.api_key = None # Will be set by the user through the UI
@@ -15,7 +14,7 @@ CUDA_AVAILABLE = torch.cuda.is_available()
15
 
16
  # Initialize the models and pipelines (for TTS)
17
  models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if CUDA_AVAILABLE else [])}
18
- pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'abefhijpz'}
19
  # Load lexicon for specific languages
20
  pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
21
  pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
@@ -58,7 +57,7 @@ def translate_to_english(api_key, text, lang_code):
58
  try:
59
  prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"
60
  response = openai.ChatCompletion.create(
61
- model="gpt-4o",
62
  messages=[{"role": "system", "content": "You are a helpful assistant that translates text."},
63
  {"role": "user", "content": prompt}]
64
  )
@@ -87,6 +86,80 @@ def generate_audio_from_text(text, lang_code, voice, speed, use_gpu=True):
87
  raise gr.Error(e)
88
  return (24000, audio.numpy())
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  # Gradio interface setup
91
  with gr.Blocks() as app:
92
  gr.Markdown("### Kokoro Text-to-Speech with Translation")
 
1
  import gradio as gr
2
  import openai
3
+ from kokoro import KPipeline, KModel
4
  import random
5
  import os
6
  import torch
7
  import time
 
8
 
9
  # Set up the OpenAI API key (optional)
10
  openai.api_key = None # Will be set by the user through the UI
 
14
 
15
  # Initialize the models and pipelines (for TTS)
16
  models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if CUDA_AVAILABLE else [])}
17
+ pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'abefhijpz']
18
  # Load lexicon for specific languages
19
  pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
20
  pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
 
57
  try:
58
  prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"
59
  response = openai.ChatCompletion.create(
60
+ model="gpt-4",
61
  messages=[{"role": "system", "content": "You are a helpful assistant that translates text."},
62
  {"role": "user", "content": prompt}]
63
  )
 
86
  raise gr.Error(e)
87
  return (24000, audio.numpy())
88
 
89
+ # Define your available voices here in the CHOICES dictionary
90
+ CHOICES = {
91
+ 'af_heart': '🇺🇸 🚺 Heart ❤️',
92
+ 'af_bella': '🇺🇸 🚺 Bella 🔥',
93
+ 'af_nicole': '🇺🇸 🚺 Nicole 🎧',
94
+ 'af_aoede': '🇺🇸 🚺 Aoede',
95
+ 'af_kore': '🇺🇸 🚺 Kore',
96
+ 'af_sarah': '🇺🇸 🚺 Sarah',
97
+ 'af_nova': '🇺🇸 🚺 Nova',
98
+ 'af_sky': '🇺🇸 🚺 Sky',
99
+ 'af_alloy': '🇺🇸 🚺 Alloy',
100
+ 'af_jessica': '🇺🇸 🚺 Jessica',
101
+ 'af_river': '🇺🇸 🚺 River',
102
+
103
+ 'am_michael': '🇺🇸 🚹 Michael',
104
+ 'am_fenrir': '🇺🇸 🚹 Fenrir',
105
+ 'am_puck': '🇺🇸 🚹 Puck',
106
+ 'am_echo': '🇺🇸 🚹 Echo',
107
+ 'am_eric': '🇺🇸 🚹 Eric',
108
+ 'am_liam': '🇺🇸 🚹 Liam',
109
+ 'am_onyx': '🇺🇸 🚹 Onyx',
110
+ 'am_santa': '🇺🇸 🚹 Santa',
111
+ 'am_adam': '🇺🇸 🚹 Adam',
112
+
113
+ 'bf_emma': '🇬🇧 🚺 Emma',
114
+ 'bf_isabella': '🇬🇧 🚺 Isabella',
115
+ 'bf_alice': '🇬🇧 🚺 Alice',
116
+ 'bf_lily': '🇬🇧 🚺 Lily',
117
+
118
+ 'bm_george': '🇬🇧 🚹 George',
119
+ 'bm_fable': '🇬🇧 🚹 Fable',
120
+ 'bm_lewis': '🇬🇧 🚹 Lewis',
121
+ 'bm_daniel': '🇬🇧 🚹 Daniel',
122
+
123
+ 'ef_dora': '🇪🇸 🚺 Dora',
124
+
125
+ 'em_alex': '🇪🇸 🚹 Alex',
126
+ 'em_santa': '🇪🇸 🚹 Santa',
127
+
128
+ 'ff_siwis': '🇫🇷 🚺 Siwis',
129
+
130
+ 'hf_alpha': '🇮🇳 🚹 Alpha',
131
+ 'hf_beta': '🇮🇳 🚹 Beta',
132
+
133
+ 'hm_omega': '🇮🇳 🚹 Omega',
134
+ 'hm_psi': '🇮🇳 🚹 Psi',
135
+
136
+ 'if_sara': '🇮🇹 🚺 Sara',
137
+
138
+ 'im_nicola': '🇮🇹 🚺 Nicola',
139
+
140
+ 'jf_alpha': '🇯🇵 🚹 Alpha',
141
+ 'jf_gongitsune': '🇯🇵 🚹 Gongitsune',
142
+ 'jf_nezumi': '🇯🇵 🚹 Nezumi',
143
+ 'jf_tebukuro': '🇯🇵 🚹 Tebukuro',
144
+
145
+ 'jm_kumo': '🇯🇵 🚹 Kumo',
146
+
147
+ 'pf_dora': '🇧🇷 🚺 Dora',
148
+
149
+ 'pm_alex': '🇧🇷 🚹 Alex',
150
+ 'pm_santa': '🇧🇷 🚹 Santa',
151
+
152
+ 'zf_xiaobei': '🇨🇳 🚺 Xiaobei',
153
+ 'zf_xiaoni': '🇨🇳 🚺 Xiaoni',
154
+ 'zf_xiaoxiao': '🇨🇳 🚺 Xiaoxiao',
155
+ 'zf_xiaoyi': '🇨🇳 🚺 Xiaoyi',
156
+
157
+ 'zm_yunjian': '🇨🇳 🚹 Yunjian',
158
+ 'zm_yunxi': '🇨🇳 🚹 Yunxi',
159
+ 'zm_yunxia': '🇨🇳 🚹 Yunxia',
160
+ 'zm_yunyang': '🇨🇳 🚹 Yunyang'
161
+ }
162
+
163
  # Gradio interface setup
164
  with gr.Blocks() as app:
165
  gr.Markdown("### Kokoro Text-to-Speech with Translation")