Spaces:
Running
on
Zero
Running
on
Zero
New TTS: Zonos both archs; disabled xVASynth v3
Browse files- app/models.py +57 -2
- test_tts_zonos.py +52 -0
app/models.py
CHANGED
@@ -31,7 +31,7 @@ AVAILABLE_MODELS = {
|
|
31 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
32 |
# 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
|
33 |
'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
34 |
-
'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
35 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
36 |
'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
37 |
# 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
|
@@ -90,6 +90,10 @@ AVAILABLE_MODELS = {
|
|
90 |
# Mars6
|
91 |
'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
|
92 |
|
|
|
|
|
|
|
|
|
93 |
# HF TTS w issues
|
94 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
95 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
@@ -417,6 +421,24 @@ HF_SPACES = {
|
|
417 |
'is_closed_source': True,
|
418 |
'series': 'MARS',
|
419 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
}
|
421 |
|
422 |
# for zero-shot TTS - voice sample used by XTTS (11 seconds)
|
@@ -658,8 +680,41 @@ OVERRIDE_INPUTS = {
|
|
658 |
'quality_prefix': "48000",
|
659 |
'clone_method': "deep-clone",
|
660 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
661 |
}
|
662 |
|
|
|
|
|
|
|
663 |
|
664 |
# Model name mapping, can include models that users cannot vote on
|
665 |
model_names = {
|
@@ -719,7 +774,7 @@ closed_source = [
|
|
719 |
]
|
720 |
|
721 |
# top five models in order to always have one of them picked and scrutinized
|
722 |
-
top_five = []
|
723 |
|
724 |
# prioritize low vote models
|
725 |
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
|
|
|
31 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
32 |
# 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
|
33 |
'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
34 |
+
# 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
35 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
36 |
'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
37 |
# 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
|
|
|
90 |
# Mars6
|
91 |
'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
|
92 |
|
93 |
+
# Zonos
|
94 |
+
'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
|
95 |
+
'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
|
96 |
+
|
97 |
# HF TTS w issues
|
98 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
99 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
|
|
421 |
'is_closed_source': True,
|
422 |
'series': 'MARS',
|
423 |
},
|
424 |
+
|
425 |
+
# Zonos
|
426 |
+
'Steveeeeeeen/Zonos': {
|
427 |
+
'name': 'Zonos T',
|
428 |
+
'function': '/generate_audio',
|
429 |
+
'text_param_index': 'text',
|
430 |
+
'return_audio_index': 0,
|
431 |
+
'is_zero_gpu_space': True,
|
432 |
+
'series': 'Zonos',
|
433 |
+
},
|
434 |
+
'Steveeeeeeen/Zonos/hybrid': {
|
435 |
+
'name': 'Zonos H',
|
436 |
+
'function': '/generate_audio',
|
437 |
+
'text_param_index': 'text',
|
438 |
+
'return_audio_index': 0,
|
439 |
+
'is_zero_gpu_space': True,
|
440 |
+
'series': 'Zonos',
|
441 |
+
},
|
442 |
}
|
443 |
|
444 |
# for zero-shot TTS - voice sample used by XTTS (11 seconds)
|
|
|
680 |
'quality_prefix': "48000",
|
681 |
'clone_method': "deep-clone",
|
682 |
},
|
683 |
+
|
684 |
+
# Zonos
|
685 |
+
'Steveeeeeeen/Zonos': {
|
686 |
+
'model_choice':"Zyphra/Zonos-v0.1-transformer",
|
687 |
+
'language': "en-us",
|
688 |
+
'speaker_audio': None, # optional
|
689 |
+
'prefix_audio': handle_file('https://huggingface.co/spaces/Steveeeeeeen/Zonos/resolve/main/assets/silence_100ms.wav'),
|
690 |
+
# 'e1': 1,
|
691 |
+
# 'e2': 0.05,
|
692 |
+
# 'e3': 0.05,
|
693 |
+
# 'e4': 0.05,
|
694 |
+
# 'e5': 0.05,
|
695 |
+
# 'e6': 0.05,
|
696 |
+
# 'e7': 0.1,
|
697 |
+
# 'e8': 0.2,
|
698 |
+
'vq_single': 0.78,
|
699 |
+
'fmax': 24000,
|
700 |
+
'pitch_std': 45,
|
701 |
+
'speaking_rate': 15,
|
702 |
+
'dnsmos_ovrl': 4,
|
703 |
+
'speaker_noised': False,
|
704 |
+
'cfg_scale': 2,
|
705 |
+
'min_p': 0.15,
|
706 |
+
'seed': 420,
|
707 |
+
'randomize_seed': False, # Set to False to easily recreate the state
|
708 |
+
'unconditional_keys': ["emotion"], # makes it ignore e1-e8
|
709 |
+
},
|
710 |
+
# 'Steveeeeeeen/Zonos/hybrid': {
|
711 |
+
# 'model_choice': 'Zyphra/Zonos-v0.1-hybrid',
|
712 |
+
# },
|
713 |
}
|
714 |
|
715 |
+
# minor mods to model from the same space
|
716 |
+
OVERRIDE_INPUTS['Steveeeeeeen/Zonos/hybrid'] = OVERRIDE_INPUTS['Steveeeeeeen/Zonos']
|
717 |
+
OVERRIDE_INPUTS['Steveeeeeeen/Zonos/hybrid']['model_choice'] = 'Zyphra/Zonos-v0.1-hybrid'
|
718 |
|
719 |
# Model name mapping, can include models that users cannot vote on
|
720 |
model_names = {
|
|
|
774 |
]
|
775 |
|
776 |
# top five models in order to always have one of them picked and scrutinized
|
777 |
+
top_five = ['Steveeeeeeen/Zonos', 'Steveeeeeeen/Zonos/hybrid']
|
778 |
|
779 |
# prioritize low vote models
|
780 |
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
|
test_tts_zonos.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from test_overrides import _get_param_examples, _override_params
|
3 |
+
from gradio_client import Client, file
|
4 |
+
|
5 |
+
model = "Steveeeeeeen/Zonos/hybrid"
|
6 |
+
# client = Client("Pendrokar/Zonos", hf_token=os.getenv('HF_TOKEN'))
|
7 |
+
client = Client("Steveeeeeeen/Zonos", hf_token=os.getenv('HF_TOKEN'))
|
8 |
+
# client = Client(model, hf_token=os.getenv('HF_TOKEN'))
|
9 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
10 |
+
# print(endpoints)
|
11 |
+
|
12 |
+
api_name = '/generate_audio'
|
13 |
+
fn_index = None
|
14 |
+
end_parameters = None
|
15 |
+
text = 'This is what my voice sounds like.'
|
16 |
+
|
17 |
+
end_parameters = _get_param_examples(
|
18 |
+
endpoints['named_endpoints'][api_name]['parameters']
|
19 |
+
)
|
20 |
+
print(end_parameters)
|
21 |
+
|
22 |
+
|
23 |
+
space_inputs = end_parameters
|
24 |
+
# override some or all default parameters
|
25 |
+
space_inputs = _override_params(end_parameters, model)
|
26 |
+
|
27 |
+
if(type(space_inputs) == dict):
|
28 |
+
space_inputs['text'] = text
|
29 |
+
result = client.predict(
|
30 |
+
**space_inputs,
|
31 |
+
api_name=api_name,
|
32 |
+
fn_index=fn_index
|
33 |
+
)
|
34 |
+
else:
|
35 |
+
space_inputs[0] = text
|
36 |
+
result = client.predict(
|
37 |
+
*space_inputs,
|
38 |
+
api_name=api_name,
|
39 |
+
fn_index=fn_index
|
40 |
+
)
|
41 |
+
# space_inputs = {str(i): value for i, value in enumerate(space_inputs)}
|
42 |
+
|
43 |
+
print(space_inputs)
|
44 |
+
# print(*space_inputs)
|
45 |
+
# print(**space_inputs)
|
46 |
+
|
47 |
+
# result = client.predict(
|
48 |
+
# **space_inputs,
|
49 |
+
# api_name=api_name,
|
50 |
+
# fn_index=fn_index
|
51 |
+
# )
|
52 |
+
print(result)
|