Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -334,93 +334,16 @@ ML_LANGUAGES = {
|
|
| 334 |
'🇨🇳 zh-CN': 'z',
|
| 335 |
}
|
| 336 |
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
'🇺🇸 🚺 Sarah ⭐': 'af_sarah',
|
| 343 |
-
'🇺🇸 🚺 Alloy': 'af_alloy',
|
| 344 |
-
'🇺🇸 🚺 Jessica 🧪': 'af_jessica',
|
| 345 |
-
'🇺🇸 🚺 Matilda 🧪': 'af_matilda',
|
| 346 |
-
'🇺🇸 🚺 Nova': 'af_nova',
|
| 347 |
-
'🇺🇸 🚺 River': 'af_river',
|
| 348 |
-
'🇺🇸 🚺 Sky': 'af_sky',
|
| 349 |
-
'🇺🇸 🚹 Adam ⭐': 'am_adam',
|
| 350 |
-
'🇺🇸 🚹 Michael ⭐': 'am_michael',
|
| 351 |
-
'🇺🇸 🚹 Echo': 'am_echo',
|
| 352 |
-
'🇺🇸 🚹 Eric': 'am_eric',
|
| 353 |
-
'🇺🇸 🚹 Liam': 'am_liam',
|
| 354 |
-
'🇺🇸 🚹 Onyx': 'am_onyx',
|
| 355 |
-
'🇺🇸 🚹 Will 🧪': 'am_will',
|
| 356 |
-
},
|
| 357 |
-
b={
|
| 358 |
-
'🇬🇧 🚺 Alice': 'bf_alice',
|
| 359 |
-
'🇬🇧 🚺 Lily': 'bf_lily',
|
| 360 |
-
'🇬🇧 🚹 Lewis ⭐': 'bm_lewis',
|
| 361 |
-
'🇬🇧 🚹 Daniel': 'bm_daniel',
|
| 362 |
-
'🇬🇧 🚹 Fable': 'bm_fable',
|
| 363 |
-
'🇬🇧 🚹 George': 'bm_george',
|
| 364 |
-
},
|
| 365 |
-
f={'🇫🇷 🚺 French Alpha': 'fr_alpha'},
|
| 366 |
-
j={
|
| 367 |
-
'🇯🇵 🚺 Japanese Alpha ⭐': 'jf_alpha',
|
| 368 |
-
'🇯🇵 🚺 Japanese Beta': 'jf_theta',
|
| 369 |
-
'🇯🇵 🚺 Japanese Gamma': 'jf_iota',
|
| 370 |
-
'🇯🇵 🚺 Japanese Delta': 'jf_kappa',
|
| 371 |
-
'🇯🇵 🚺 Japanese Epsilon': 'jf_beta_0',
|
| 372 |
-
'🇯🇵 🚺 Japanese Zeta': 'jf_gamma_0',
|
| 373 |
-
'🇯🇵 🚺 Japanese Eta': 'jf_delta_0',
|
| 374 |
-
'🇯🇵 🚺 Japanese Theta': 'jf_epsilon',
|
| 375 |
-
'🇯🇵 🚺 Japanese Iota': 'jf_zeta',
|
| 376 |
-
'🇯🇵 🚺 Japanese Kappa': 'jf_eta',
|
| 377 |
-
'🇯🇵 🚹 Japanese Omega': 'jm_omega',
|
| 378 |
-
},
|
| 379 |
-
k={
|
| 380 |
-
'🇰🇷 🚺 Korean Alpha': 'kf_alpha',
|
| 381 |
-
'🇰🇷 🚺 Korean Beta': 'kf_beta',
|
| 382 |
-
'🇰🇷 🚺 Korean Gamma': 'kf_gamma',
|
| 383 |
-
'🇰🇷 🚺 Korean Delta': 'kf_delta',
|
| 384 |
-
'🇰🇷 🚺 Korean Epsilon': 'kf_epsilon',
|
| 385 |
-
'🇰🇷 🚺 Korean Zeta': 'kf_zeta',
|
| 386 |
-
'🇰🇷 🚺 Korean Eta': 'kf_eta',
|
| 387 |
-
'🇰🇷 🚺 Korean Theta': 'kf_theta',
|
| 388 |
-
'🇰🇷 🚺 Korean Iota': 'kf_iota',
|
| 389 |
-
'🇰🇷 🚺 Korean Kappa': 'kf_kappa',
|
| 390 |
-
'🇰🇷 🚺 Korean Lambda': 'kf_lambda',
|
| 391 |
-
'🇰🇷 🚺 Korean Mu': 'kf_mu',
|
| 392 |
-
'🇰🇷 🚺 Korean Nu': 'kf_nu',
|
| 393 |
-
'🇰🇷 🚺 Korean Xi': 'kf_xi',
|
| 394 |
-
'🇰🇷 🚺 Korean Omicron': 'kf_omicron',
|
| 395 |
-
'🇰🇷 🚹 Korean Pi': 'km_pi',
|
| 396 |
-
'🇰🇷 🚹 Korean Rho': 'km_rho',
|
| 397 |
-
'🇰🇷 🚹 Korean Sigma': 'km_sigma',
|
| 398 |
-
'🇰🇷 🚹 Korean Tau': 'km_tau',
|
| 399 |
-
'🇰🇷 🚹 Korean Upsilon': 'km_upsilon',
|
| 400 |
-
'🇰🇷 🚹 Korean Phi': 'km_phi',
|
| 401 |
-
'🇰🇷 🚹 Korean Chi': 'km_chi',
|
| 402 |
-
'🇰🇷 🚹 Korean Psi': 'km_psi',
|
| 403 |
-
'🇰🇷 🚹 Korean Omega': 'km_omega',
|
| 404 |
-
},
|
| 405 |
-
z={
|
| 406 |
-
'🇨🇳 🚺 Mandarin Alpha': 'zf_beta',
|
| 407 |
-
'🇨🇳 🚺 Mandarin Beta': 'zf_gamma',
|
| 408 |
-
'🇨🇳 🚺 Mandarin Gamma': 'zf_delta',
|
| 409 |
-
'🇨🇳 🚺 Mandarin Delta': 'zf_epsilon',
|
| 410 |
-
'🇨🇳 🚺 Mandarin Epsilon 🧪': 'zf_alpha',
|
| 411 |
-
'🇨🇳 🚹 Mandarin Phi': 'zm_phi',
|
| 412 |
-
'🇨🇳 🚹 Mandarin Chi': 'zm_chi',
|
| 413 |
-
'🇨🇳 🚹 Mandarin Psi': 'zm_psi',
|
| 414 |
-
'🇨🇳 🚹 Mandarin Omega': 'zm_omega',
|
| 415 |
-
},
|
| 416 |
-
)
|
| 417 |
def change_language(value):
|
| 418 |
choices = list(ML_CHOICES[value].items())
|
| 419 |
info = 'Missing British voices will be restored later' if value == 'b' else '⭐ voices are stable, 🧪 are unstable'
|
| 420 |
return gr.Dropdown(choices, value=choices[0][1], label='Voice', info=info)
|
| 421 |
|
| 422 |
-
from gradio_client import Client
|
| 423 |
-
client = Client('hexgrad/kokoro-src', hf_token=os.environ['SRC'])
|
| 424 |
def multilingual(text, voice, speed, trim, sk):
|
| 425 |
if not text.strip():
|
| 426 |
return None
|
|
@@ -431,7 +354,7 @@ def multilingual(text, voice, speed, trim, sk):
|
|
| 431 |
gr.Warning('Input may have been truncated')
|
| 432 |
except Exception as e:
|
| 433 |
print('📡', datetime.now(), text, voice, repr(e))
|
| 434 |
-
gr.Warning('v0.
|
| 435 |
gr.Info('Switching to v0.19')
|
| 436 |
audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
|
| 437 |
return audio
|
|
@@ -442,7 +365,7 @@ with gr.Blocks() as ml_tts:
|
|
| 442 |
with gr.Row():
|
| 443 |
with gr.Column():
|
| 444 |
text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
|
| 445 |
-
voice = gr.Dropdown(list(ML_CHOICES['a'].items()), value=
|
| 446 |
lang.change(fn=change_language, inputs=[lang], outputs=[voice])
|
| 447 |
with gr.Row():
|
| 448 |
random_btn = gr.Button('Random Text', variant='secondary')
|
|
@@ -457,11 +380,11 @@ with gr.Blocks() as ml_tts:
|
|
| 457 |
trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
|
| 458 |
with gr.Row():
|
| 459 |
gr.Markdown('''
|
| 460 |
-
🎉 New! Kokoro v0.
|
| 461 |
|
| 462 |
📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
|
| 463 |
|
| 464 |
-
⚠️
|
| 465 |
|
| 466 |
🇨🇳🇯🇵🇰🇷 Tokenizers for Chinese, Japanese, and Korean do not correctly handle English letters yet. Remove or convert them to CJK first.
|
| 467 |
''', container=True)
|
|
@@ -471,76 +394,6 @@ with gr.Blocks() as ml_tts:
|
|
| 471 |
text.submit(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
| 472 |
generate_btn.click(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
| 473 |
|
| 474 |
-
client_x = Client('hexgrad/kokoro-src-x', hf_token=os.environ['SRC'])
|
| 475 |
-
def preview(text, voice, speed, trim, sk):
|
| 476 |
-
if not text.strip():
|
| 477 |
-
return None
|
| 478 |
-
assert sk == os.environ['SK'], ('❌', datetime.now(), text, voice, sk)
|
| 479 |
-
try:
|
| 480 |
-
audio, out_ps = client_x.predict(text=text, voice=voice, speed=speed, trim=trim, use_gpu=True, sk=sk, api_name='/generate')
|
| 481 |
-
if len(out_ps) == 510:
|
| 482 |
-
gr.Warning('Input may have been truncated')
|
| 483 |
-
except Exception as e:
|
| 484 |
-
print('📡', datetime.now(), text, voice, repr(e))
|
| 485 |
-
gr.Warning('v0.22x temporarily unavailable')
|
| 486 |
-
gr.Info('Switching to v0.19')
|
| 487 |
-
audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
|
| 488 |
-
return audio
|
| 489 |
-
|
| 490 |
-
def vote(btn):
|
| 491 |
-
print(btn)
|
| 492 |
-
gr.Info('Thanks for the feedback!')
|
| 493 |
-
|
| 494 |
-
PREVIEW_CHOICES = {
|
| 495 |
-
'🇺🇸 🚺 Heart ❤️': 'af_heart',
|
| 496 |
-
'🇺🇸 🚺 Spirit 🦋': 'af_spirit',
|
| 497 |
-
'🇬🇧 🚺 Soul 🪽': 'bf_soul',
|
| 498 |
-
}
|
| 499 |
-
|
| 500 |
-
with gr.Blocks() as preview_tts:
|
| 501 |
-
with gr.Row():
|
| 502 |
-
gr.Markdown('''
|
| 503 |
-
🧪 Experimental: v0.22x previews a potential change to the default English voice. 🧪
|
| 504 |
-
|
| 505 |
-
☝️ Check out v0.19 and multilingual v0.22 for a lot more voices, languages, and features!
|
| 506 |
-
|
| 507 |
-
📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
|
| 508 |
-
''', container=True)
|
| 509 |
-
with gr.Row():
|
| 510 |
-
with gr.Column():
|
| 511 |
-
text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
|
| 512 |
-
voice = gr.Dropdown(list(PREVIEW_CHOICES.items()), value='af_heart', label='Voice', info='🧪 These voices are experimental')
|
| 513 |
-
with gr.Row():
|
| 514 |
-
random_btn = gr.Button('Random Text', variant='secondary')
|
| 515 |
-
generate_btn = gr.Button('Generate', variant='primary')
|
| 516 |
-
random_btn.click(get_random_text, inputs=[voice], outputs=[text])
|
| 517 |
-
with gr.Column():
|
| 518 |
-
audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
|
| 519 |
-
with gr.Accordion('Audio Settings', open=False):
|
| 520 |
-
autoplay = gr.Checkbox(value=True, label='Autoplay')
|
| 521 |
-
autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
|
| 522 |
-
speed = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='⚡️ Speed', info='Adjust the speaking speed')
|
| 523 |
-
trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
|
| 524 |
-
with gr.Row():
|
| 525 |
-
with gr.Accordion('Feedback', open=True):
|
| 526 |
-
with gr.Row():
|
| 527 |
-
gr.Markdown('Vote for the voice you like the best among 3 challengers and 1 defender.')
|
| 528 |
-
with gr.Row():
|
| 529 |
-
heart_btn = gr.Button('🇺🇸 🚺 Heart ❤️', variant='secondary')
|
| 530 |
-
heart_btn.click(vote, inputs=[heart_btn])
|
| 531 |
-
soul_btn = gr.Button('🇺🇸 🚺 Spirit 🦋', variant='secondary')
|
| 532 |
-
soul_btn.click(vote, inputs=[soul_btn])
|
| 533 |
-
with gr.Row():
|
| 534 |
-
spirit_btn = gr.Button('🇬🇧 🚺 Soul 🪽', variant='secondary')
|
| 535 |
-
spirit_btn.click(vote, inputs=[spirit_btn])
|
| 536 |
-
old_btn = gr.Button('🇺🇸 🚺 American Female ⭐', variant='secondary')
|
| 537 |
-
old_btn.click(vote, inputs=[old_btn])
|
| 538 |
-
with gr.Row():
|
| 539 |
-
sk = gr.Textbox(visible=False)
|
| 540 |
-
text.change(lambda: os.environ['SK'], outputs=[sk])
|
| 541 |
-
text.submit(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
| 542 |
-
generate_btn.click(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
| 543 |
-
|
| 544 |
USE_GPU_CHOICES = [('Auto 🔀', 'auto'), ('CPU 💬', False), ('ZeroGPU 📄', True)]
|
| 545 |
USE_GPU_INFOS = {
|
| 546 |
'auto': 'Use CPU or GPU, whichever is faster',
|
|
@@ -831,6 +684,10 @@ This Space and the underlying Kokoro model are both under development and subjec
|
|
| 831 |
'''
|
| 832 |
with gr.Blocks() as changelog:
|
| 833 |
gr.Markdown('''
|
|
|
|
|
|
|
|
|
|
|
|
|
| 834 |
**8 Dec 2024**<br/>
|
| 835 |
🚀 Multilingual v0.22<br/>
|
| 836 |
🌐 5 languages: English, Chinese, Japanese, Korean, French<br/>
|
|
@@ -900,8 +757,8 @@ These datasets were **NOT** used to train Kokoro. They may be of interest to aca
|
|
| 900 |
|
| 901 |
with gr.Blocks() as app:
|
| 902 |
gr.TabbedInterface(
|
| 903 |
-
[
|
| 904 |
-
['🔥
|
| 905 |
)
|
| 906 |
|
| 907 |
if __name__ == '__main__':
|
|
|
|
| 334 |
'🇨🇳 zh-CN': 'z',
|
| 335 |
}
|
| 336 |
|
| 337 |
+
from gradio_client import Client
|
| 338 |
+
client = Client('hexgrad/kokoro-src', hf_token=os.environ['SRC'])
|
| 339 |
+
import json
|
| 340 |
+
ML_CHOICES = json.loads(client.predict(api_name='/list_voices'))
|
| 341 |
+
DEFAULT_VOICE = list(ML_CHOICES['a'].values())[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
def change_language(value):
|
| 343 |
choices = list(ML_CHOICES[value].items())
|
| 344 |
info = 'Missing British voices will be restored later' if value == 'b' else '⭐ voices are stable, 🧪 are unstable'
|
| 345 |
return gr.Dropdown(choices, value=choices[0][1], label='Voice', info=info)
|
| 346 |
|
|
|
|
|
|
|
| 347 |
def multilingual(text, voice, speed, trim, sk):
|
| 348 |
if not text.strip():
|
| 349 |
return None
|
|
|
|
| 354 |
gr.Warning('Input may have been truncated')
|
| 355 |
except Exception as e:
|
| 356 |
print('📡', datetime.now(), text, voice, repr(e))
|
| 357 |
+
gr.Warning('v0.23 temporarily unavailable')
|
| 358 |
gr.Info('Switching to v0.19')
|
| 359 |
audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
|
| 360 |
return audio
|
|
|
|
| 365 |
with gr.Row():
|
| 366 |
with gr.Column():
|
| 367 |
text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
|
| 368 |
+
voice = gr.Dropdown(list(ML_CHOICES['a'].items()), value=DEFAULT_VOICE, label='Voice', info='⭐ voices are stable, 🧪 are unstable')
|
| 369 |
lang.change(fn=change_language, inputs=[lang], outputs=[voice])
|
| 370 |
with gr.Row():
|
| 371 |
random_btn = gr.Button('Random Text', variant='secondary')
|
|
|
|
| 380 |
trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
|
| 381 |
with gr.Row():
|
| 382 |
gr.Markdown('''
|
| 383 |
+
🎉 New! Kokoro v0.23 now supports 5 languages, including a new default voices. 🎉
|
| 384 |
|
| 385 |
📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
|
| 386 |
|
| 387 |
+
⚠️ v0.23 does not yet support custom pronunciation, Long Form, or Voice Mixer. You can still use these features in v0.19.
|
| 388 |
|
| 389 |
🇨🇳🇯🇵🇰🇷 Tokenizers for Chinese, Japanese, and Korean do not correctly handle English letters yet. Remove or convert them to CJK first.
|
| 390 |
''', container=True)
|
|
|
|
| 394 |
text.submit(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
| 395 |
generate_btn.click(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
| 396 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
USE_GPU_CHOICES = [('Auto 🔀', 'auto'), ('CPU 💬', False), ('ZeroGPU 📄', True)]
|
| 398 |
USE_GPU_INFOS = {
|
| 399 |
'auto': 'Use CPU or GPU, whichever is faster',
|
|
|
|
| 684 |
'''
|
| 685 |
with gr.Blocks() as changelog:
|
| 686 |
gr.Markdown('''
|
| 687 |
+
**11 Dec 2024**<br/>
|
| 688 |
+
🚀 Multilingual v0.23<br/>
|
| 689 |
+
🗣️ 85 total voices
|
| 690 |
+
|
| 691 |
**8 Dec 2024**<br/>
|
| 692 |
🚀 Multilingual v0.22<br/>
|
| 693 |
🌐 5 languages: English, Chinese, Japanese, Korean, French<br/>
|
|
|
|
| 757 |
|
| 758 |
with gr.Blocks() as app:
|
| 759 |
gr.TabbedInterface(
|
| 760 |
+
[ml_tts, basic_tts, lf_tts, about, data_card, changelog],
|
| 761 |
+
['🔥 Latest v0.23', '🗣️ TTS v0.19', '📖 Long Form v0.19', 'ℹ️ About', '📁 Data', '📝 Changelog'],
|
| 762 |
)
|
| 763 |
|
| 764 |
if __name__ == '__main__':
|