Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,7 @@ import pypdf
|
|
| 10 |
import random
|
| 11 |
import re
|
| 12 |
import spaces
|
|
|
|
| 13 |
import torch
|
| 14 |
import yaml
|
| 15 |
|
|
@@ -43,6 +44,13 @@ def get_random_text(voice):
|
|
| 43 |
lang = 'en'
|
| 44 |
return random.choice(random_texts[lang])
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def parens_to_angles(s):
|
| 47 |
return s.replace('(', '«').replace(')', '»')
|
| 48 |
|
|
@@ -232,8 +240,9 @@ def clamp_speed(speed):
|
|
| 232 |
return 2
|
| 233 |
return speed
|
| 234 |
|
|
|
|
| 235 |
# Must be backwards compatible with https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena
|
| 236 |
-
def generate(text, voice='af', ps=None, speed=1, trim=3000, use_gpu='auto'
|
| 237 |
voices = resolve_voices(voice, warn=ps)
|
| 238 |
ps = ps or phonemize(text, voice)
|
| 239 |
speed = clamp_speed(speed)
|
|
@@ -246,6 +255,7 @@ def generate(text, voice='af', ps=None, speed=1, trim=3000, use_gpu='auto', sk=N
|
|
| 246 |
tokens = tokens[:510]
|
| 247 |
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
| 248 |
use_gpu = len(ps) > 99 if use_gpu == 'auto' else use_gpu
|
|
|
|
| 249 |
print('🔥', datetime.now(), text, voices, ps, use_gpu, sk)
|
| 250 |
try:
|
| 251 |
if use_gpu:
|
|
@@ -321,10 +331,8 @@ with gr.Blocks() as basic_tts:
|
|
| 321 |
btn = gr.Button(list(CHOICES.values())[i*4+j], variant='primary' if i*4+j < 10 else 'secondary')
|
| 322 |
btn.click(lambda v, b: f'{v}+{b}' if v.startswith(b[:2]) else b, inputs=[voice, btn], outputs=[voice])
|
| 323 |
voice.change(lambda v, b: gr.Button(b, variant='primary' if v.startswith(b[:2]) else 'secondary'), inputs=[voice, btn], outputs=[btn])
|
| 324 |
-
sk = gr.State()
|
| 325 |
text.submit(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
|
| 326 |
generate_btn.click(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
|
| 327 |
-
basic_tts.load(lambda r: r.session_hash, None, sk)
|
| 328 |
|
| 329 |
@torch.no_grad()
|
| 330 |
def lf_forward(token_lists, voices, speed, device='cpu'):
|
|
@@ -500,7 +508,7 @@ with gr.Blocks() as lf_tts:
|
|
| 500 |
|
| 501 |
with gr.Blocks() as about:
|
| 502 |
gr.Markdown('''
|
| 503 |
-
Kokoro is a frontier TTS model for its size. It has [80 million](https://hf.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#
|
| 504 |
|
| 505 |
### FAQ
|
| 506 |
**Will this be open sourced?**<br/>
|
|
@@ -582,6 +590,7 @@ with gr.Blocks() as app:
|
|
| 582 |
[basic_tts, lf_tts, about, changelog],
|
| 583 |
['🔥 Basic TTS', '📖 Long Form', 'ℹ️ About', '📝 Changelog'],
|
| 584 |
)
|
|
|
|
| 585 |
|
| 586 |
if __name__ == '__main__':
|
| 587 |
app.queue(api_open=True).launch()
|
|
|
|
| 10 |
import random
|
| 11 |
import re
|
| 12 |
import spaces
|
| 13 |
+
import subprocess
|
| 14 |
import torch
|
| 15 |
import yaml
|
| 16 |
|
|
|
|
| 44 |
lang = 'en'
|
| 45 |
return random.choice(random_texts[lang])
|
| 46 |
|
| 47 |
+
sents = set()
|
| 48 |
+
for txt in {'harvard_sentences', 'llama3_command-r_sentences_1st_person', 'llama3_command-r_sentences_excla', 'llama3_command-r_questions'}:
|
| 49 |
+
txt += '.txt'
|
| 50 |
+
subprocess.run(['wget', f'https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena/resolve/main/{txt}'])
|
| 51 |
+
with open('harvard_sentences.txt') as f:
|
| 52 |
+
sents.update(f.read().strip().splitlines())
|
| 53 |
+
|
| 54 |
def parens_to_angles(s):
|
| 55 |
return s.replace('(', '«').replace(')', '»')
|
| 56 |
|
|
|
|
| 240 |
return 2
|
| 241 |
return speed
|
| 242 |
|
| 243 |
+
sk = gr.State()
|
| 244 |
# Must be backwards compatible with https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena
|
| 245 |
+
def generate(text, voice='af', ps=None, speed=1, trim=3000, use_gpu='auto'):
|
| 246 |
voices = resolve_voices(voice, warn=ps)
|
| 247 |
ps = ps or phonemize(text, voice)
|
| 248 |
speed = clamp_speed(speed)
|
|
|
|
| 255 |
tokens = tokens[:510]
|
| 256 |
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
| 257 |
use_gpu = len(ps) > 99 if use_gpu == 'auto' else use_gpu
|
| 258 |
+
global sk
|
| 259 |
print('🔥', datetime.now(), text, voices, ps, use_gpu, sk)
|
| 260 |
try:
|
| 261 |
if use_gpu:
|
|
|
|
| 331 |
btn = gr.Button(list(CHOICES.values())[i*4+j], variant='primary' if i*4+j < 10 else 'secondary')
|
| 332 |
btn.click(lambda v, b: f'{v}+{b}' if v.startswith(b[:2]) else b, inputs=[voice, btn], outputs=[voice])
|
| 333 |
voice.change(lambda v, b: gr.Button(b, variant='primary' if v.startswith(b[:2]) else 'secondary'), inputs=[voice, btn], outputs=[btn])
|
|
|
|
| 334 |
text.submit(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
|
| 335 |
generate_btn.click(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
|
|
|
|
| 336 |
|
| 337 |
@torch.no_grad()
|
| 338 |
def lf_forward(token_lists, voices, speed, device='cpu'):
|
|
|
|
| 508 |
|
| 509 |
with gr.Blocks() as about:
|
| 510 |
gr.Markdown('''
|
| 511 |
+
Kokoro is a frontier TTS model for its size. It has [80 million](https://hf.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L33) parameters, uses a lean [StyleTTS 2](https://github.com/yl4579/StyleTTS2) architecture, and was trained on high-quality data. The weights are currently private, but a free public demo is hosted here, at `https://hf.co/spaces/hexgrad/Kokoro-TTS`. The Community tab is open for feature requests, bug reports, etc. For other inquiries, contact `@rzvzn` on Discord.
|
| 512 |
|
| 513 |
### FAQ
|
| 514 |
**Will this be open sourced?**<br/>
|
|
|
|
| 590 |
[basic_tts, lf_tts, about, changelog],
|
| 591 |
['🔥 Basic TTS', '📖 Long Form', 'ℹ️ About', '📝 Changelog'],
|
| 592 |
)
|
| 593 |
+
app.load(lambda r: r.session_hash, None, sk)
|
| 594 |
|
| 595 |
if __name__ == '__main__':
|
| 596 |
app.queue(api_open=True).launch()
|