Spaces:
Running
on
Zero
Running
on
Zero
ZeroGPU XTTS
Browse files- app/models.py +26 -12
- test_tts_xtts.py +18 -11
app/models.py
CHANGED
@@ -26,6 +26,7 @@ AVAILABLE_MODELS = {
|
|
26 |
# '<keyname>':'<Space URL>'
|
27 |
# gradio version that works with most spaces: 4.29
|
28 |
# 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
|
|
|
29 |
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
30 |
#'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
31 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
@@ -109,13 +110,21 @@ AVAILABLE_MODELS = {
|
|
109 |
|
110 |
HF_SPACES = {
|
111 |
# XTTS v2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
'coqui/xtts': {
|
113 |
'name': 'XTTS v2',
|
114 |
-
'function': '
|
115 |
-
'text_param_index':
|
116 |
-
'return_audio_index':
|
117 |
'series': 'XTTS',
|
118 |
-
'emoji': '😩', # old gradio
|
119 |
},
|
120 |
|
121 |
# WhisperSpeech
|
@@ -238,7 +247,8 @@ HF_SPACES = {
|
|
238 |
'return_audio_index': 0,
|
239 |
'is_closed_source': True,
|
240 |
'series': 'Edge TTS',
|
241 |
-
'emoji': '
|
|
|
242 |
},
|
243 |
|
244 |
# Fish Speech
|
@@ -468,13 +478,17 @@ DEFAULT_VOICE_PROMPT = "female voice; very clear audio"
|
|
468 |
|
469 |
# Older gradio spaces use unnamed parameters, both types are valid
|
470 |
OVERRIDE_INPUTS = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
'coqui/xtts': {
|
472 |
-
|
473 |
-
2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
|
474 |
-
3: None, # mic voice sample
|
475 |
-
4: False, #use_mic
|
476 |
-
5: False, #cleanup_reference
|
477 |
-
6: False, #auto_detect
|
478 |
},
|
479 |
'collabora/WhisperSpeech': {
|
480 |
1: DEFAULT_VOICE_SAMPLE, # voice sample
|
@@ -866,7 +880,7 @@ def make_link_to_space(model_name, for_leaderboard=False):
|
|
866 |
emoji = HF_SPACES[model_name]['emoji']
|
867 |
except:
|
868 |
pass
|
869 |
-
return emoji +' <a target="_blank" style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_basename +'</a>'
|
870 |
|
871 |
# otherwise just return without emoji
|
872 |
return '<span style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_name +'</span>'
|
|
|
26 |
# '<keyname>':'<Space URL>'
|
27 |
# gradio version that works with most spaces: 4.29
|
28 |
# 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
|
29 |
+
'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
|
30 |
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
31 |
#'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
32 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
|
|
110 |
|
111 |
HF_SPACES = {
|
112 |
# XTTS v2
|
113 |
+
# 'coqui/xtts': {
|
114 |
+
# 'name': 'XTTS v2',
|
115 |
+
# 'function': '1',
|
116 |
+
# 'text_param_index': 0,
|
117 |
+
# 'return_audio_index': 1,
|
118 |
+
# 'series': 'XTTS',
|
119 |
+
# 'emoji': '😩', # old gradio
|
120 |
+
# },
|
121 |
+
# tonyassi ZeroGPU XTTS v2
|
122 |
'coqui/xtts': {
|
123 |
'name': 'XTTS v2',
|
124 |
+
'function': '/predict',
|
125 |
+
'text_param_index': 'text',
|
126 |
+
'return_audio_index': 0,
|
127 |
'series': 'XTTS',
|
|
|
128 |
},
|
129 |
|
130 |
# WhisperSpeech
|
|
|
247 |
'return_audio_index': 0,
|
248 |
'is_closed_source': True,
|
249 |
'series': 'Edge TTS',
|
250 |
+
'emoji': '', # api disabled
|
251 |
+
'space_link': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
|
252 |
},
|
253 |
|
254 |
# Fish Speech
|
|
|
478 |
|
479 |
# Older gradio spaces use unnamed parameters, both types are valid
|
480 |
OVERRIDE_INPUTS = {
|
481 |
+
# 'coqui/xtts': {
|
482 |
+
# 1: 'en',
|
483 |
+
# 2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
|
484 |
+
# 3: None, # mic voice sample
|
485 |
+
# 4: False, #use_mic
|
486 |
+
# 5: False, #cleanup_reference
|
487 |
+
# 6: False, #auto_detect
|
488 |
+
# },
|
489 |
+
# tonyassi ZeroGPU space of XTTS:
|
490 |
'coqui/xtts': {
|
491 |
+
'audio': DEFAULT_VOICE_SAMPLE, # voice sample
|
|
|
|
|
|
|
|
|
|
|
492 |
},
|
493 |
'collabora/WhisperSpeech': {
|
494 |
1: DEFAULT_VOICE_SAMPLE, # voice sample
|
|
|
880 |
emoji = HF_SPACES[model_name]['emoji']
|
881 |
except:
|
882 |
pass
|
883 |
+
return (emoji +' <a target="_blank" style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_basename +'</a>').strip()
|
884 |
|
885 |
# otherwise just return without emoji
|
886 |
return '<span style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_name +'</span>'
|
test_tts_xtts.py
CHANGED
@@ -1,17 +1,24 @@
|
|
1 |
import os
|
2 |
-
from gradio_client import Client,
|
3 |
|
4 |
-
client = Client("coqui/xtts", hf_token=os.getenv('HF_TOKEN'))
|
|
|
5 |
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
# print(endpoints)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
result = client.predict(
|
8 |
-
"Quick test.", # str in 'What should I say!? (max 512 characters).' Textbox component
|
9 |
-
'
|
10 |
-
|
11 |
-
None, # mic voice sample
|
12 |
-
False, #use_mic
|
13 |
-
False, #cleanup_reference
|
14 |
-
False, #auto_detect
|
15 |
-
True, #ToS
|
16 |
-
fn_index=1
|
17 |
)
|
|
|
1 |
import os
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
|
4 |
+
# client = Client("coqui/xtts", hf_token=os.getenv('HF_TOKEN'), headers={})
|
5 |
+
client = Client("tonyassi/voice-clone", hf_token=os.getenv('HF_TOKEN'), headers={})
|
6 |
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
7 |
# print(endpoints)
|
8 |
+
# result = client.predict(
|
9 |
+
# "Quick test.", # str in 'What should I say!? (max 512 characters).' Textbox component
|
10 |
+
# 'en', #lang
|
11 |
+
# 'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav', # voice sample
|
12 |
+
# None, # mic voice sample
|
13 |
+
# False, #use_mic
|
14 |
+
# False, #cleanup_reference
|
15 |
+
# False, #auto_detect
|
16 |
+
# True, #ToS
|
17 |
+
# fn_index=1
|
18 |
+
# )
|
19 |
+
# tony's space
|
20 |
result = client.predict(
|
21 |
+
text="Quick test.", # str in 'What should I say!? (max 512 characters).' Textbox component
|
22 |
+
audio=handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'), # voice sample
|
23 |
+
api_name="/predict"
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
)
|