Pendrokar commited on
Commit
fbe6a51
·
1 Parent(s): 3dcc608

ZeroGPU XTTS

Browse files
Files changed (2) hide show
  1. app/models.py +26 -12
  2. test_tts_xtts.py +18 -11
app/models.py CHANGED
@@ -26,6 +26,7 @@ AVAILABLE_MODELS = {
26
  # '<keyname>':'<Space URL>'
27
  # gradio version that works with most spaces: 4.29
28
  # 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
 
29
  # 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
30
  #'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
31
  #'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
@@ -109,13 +110,21 @@ AVAILABLE_MODELS = {
109
 
110
  HF_SPACES = {
111
  # XTTS v2
 
 
 
 
 
 
 
 
 
112
  'coqui/xtts': {
113
  'name': 'XTTS v2',
114
- 'function': '1',
115
- 'text_param_index': 0,
116
- 'return_audio_index': 1,
117
  'series': 'XTTS',
118
- 'emoji': '😩', # old gradio
119
  },
120
 
121
  # WhisperSpeech
@@ -238,7 +247,8 @@ HF_SPACES = {
238
  'return_audio_index': 0,
239
  'is_closed_source': True,
240
  'series': 'Edge TTS',
241
- 'emoji': '😑', # api disabled
 
242
  },
243
 
244
  # Fish Speech
@@ -468,13 +478,17 @@ DEFAULT_VOICE_PROMPT = "female voice; very clear audio"
468
 
469
  # Older gradio spaces use unnamed parameters, both types are valid
470
  OVERRIDE_INPUTS = {
 
 
 
 
 
 
 
 
 
471
  'coqui/xtts': {
472
- 1: 'en',
473
- 2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
474
- 3: None, # mic voice sample
475
- 4: False, #use_mic
476
- 5: False, #cleanup_reference
477
- 6: False, #auto_detect
478
  },
479
  'collabora/WhisperSpeech': {
480
  1: DEFAULT_VOICE_SAMPLE, # voice sample
@@ -866,7 +880,7 @@ def make_link_to_space(model_name, for_leaderboard=False):
866
  emoji = HF_SPACES[model_name]['emoji']
867
  except:
868
  pass
869
- return emoji +' <a target="_blank" style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_basename +'</a>'
870
 
871
  # otherwise just return without emoji
872
  return '<span style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_name +'</span>'
 
26
  # '<keyname>':'<Space URL>'
27
  # gradio version that works with most spaces: 4.29
28
  # 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
29
+ 'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
30
  # 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
31
  #'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
32
  #'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
 
110
 
111
  HF_SPACES = {
112
  # XTTS v2
113
+ # 'coqui/xtts': {
114
+ # 'name': 'XTTS v2',
115
+ # 'function': '1',
116
+ # 'text_param_index': 0,
117
+ # 'return_audio_index': 1,
118
+ # 'series': 'XTTS',
119
+ # 'emoji': '😩', # old gradio
120
+ # },
121
+ # tonyassi ZeroGPU XTTS v2
122
  'coqui/xtts': {
123
  'name': 'XTTS v2',
124
+ 'function': '/predict',
125
+ 'text_param_index': 'text',
126
+ 'return_audio_index': 0,
127
  'series': 'XTTS',
 
128
  },
129
 
130
  # WhisperSpeech
 
247
  'return_audio_index': 0,
248
  'is_closed_source': True,
249
  'series': 'Edge TTS',
250
+ 'emoji': '', # api disabled
251
+ 'space_link': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
252
  },
253
 
254
  # Fish Speech
 
478
 
479
  # Older gradio spaces use unnamed parameters, both types are valid
480
  OVERRIDE_INPUTS = {
481
+ # 'coqui/xtts': {
482
+ # 1: 'en',
483
+ # 2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
484
+ # 3: None, # mic voice sample
485
+ # 4: False, #use_mic
486
+ # 5: False, #cleanup_reference
487
+ # 6: False, #auto_detect
488
+ # },
489
+ # tonyassi ZeroGPU space of XTTS:
490
  'coqui/xtts': {
491
+ 'audio': DEFAULT_VOICE_SAMPLE, # voice sample
 
 
 
 
 
492
  },
493
  'collabora/WhisperSpeech': {
494
  1: DEFAULT_VOICE_SAMPLE, # voice sample
 
880
  emoji = HF_SPACES[model_name]['emoji']
881
  except:
882
  pass
883
+ return (emoji +' <a target="_blank" style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_basename +'</a>').strip()
884
 
885
  # otherwise just return without emoji
886
  return '<span style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_name +'</span>'
test_tts_xtts.py CHANGED
@@ -1,17 +1,24 @@
1
  import os
2
- from gradio_client import Client, file
3
 
4
- client = Client("coqui/xtts", hf_token=os.getenv('HF_TOKEN'))
 
5
  endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
  # print(endpoints)
 
 
 
 
 
 
 
 
 
 
 
 
7
  result = client.predict(
8
- "Quick test.", # str in 'What should I say!? (max 512 characters).' Textbox component
9
- 'en', #lang
10
- 'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav', # voice sample
11
- None, # mic voice sample
12
- False, #use_mic
13
- False, #cleanup_reference
14
- False, #auto_detect
15
- True, #ToS
16
- fn_index=1
17
  )
 
1
  import os
2
+ from gradio_client import Client, handle_file
3
 
4
+ # client = Client("coqui/xtts", hf_token=os.getenv('HF_TOKEN'), headers={})
5
+ client = Client("tonyassi/voice-clone", hf_token=os.getenv('HF_TOKEN'), headers={})
6
  endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
7
  # print(endpoints)
8
+ # result = client.predict(
9
+ # "Quick test.", # str in 'What should I say!? (max 512 characters).' Textbox component
10
+ # 'en', #lang
11
+ # 'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav', # voice sample
12
+ # None, # mic voice sample
13
+ # False, #use_mic
14
+ # False, #cleanup_reference
15
+ # False, #auto_detect
16
+ # True, #ToS
17
+ # fn_index=1
18
+ # )
19
+ # tony's space
20
  result = client.predict(
21
+ text="Quick test.", # str in 'What should I say!? (max 512 characters).' Textbox component
22
+ audio=handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'), # voice sample
23
+ api_name="/predict"
 
 
 
 
 
 
24
  )