Pendrokar commited on
Commit
f3f028c
·
1 Parent(s): 97d05f0

IndexTTS return audio index fix

Browse files
Files changed (2) hide show
  1. app/models.py +16 -13
  2. app/synth.py +4 -4
app/models.py CHANGED
@@ -26,7 +26,7 @@ AVAILABLE_MODELS = {
26
  # '<keyname>':'<Space URL>'
27
  # gradio version that works with most spaces: 4.29
28
  # 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
29
- 'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
30
  # 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
31
  #'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
32
  #'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
@@ -35,11 +35,11 @@ AVAILABLE_MODELS = {
35
  # 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
36
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
37
  # 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
38
- 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
39
 
40
  # E2 & F5 TTS
41
  # F5 model
42
- 'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
43
  # E2 model
44
  # 'mrfakename/E2-F5-TTS/E2': 'mrfakename/E2-F5-TTS', # seems to require multiple requests for setup
45
 
@@ -47,13 +47,13 @@ AVAILABLE_MODELS = {
47
  # Parler Large model
48
  # 'parler-tts/parler_tts/large': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
49
  # Parler Mini model
50
- 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
51
  # 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
52
  # 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
53
 
54
  # # Microsoft Edge TTS
55
  # 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
56
- 'innoai/Edge-TTS-Text-to-Speech': '/Edge-TTS', # using Edge API
57
 
58
  # IMS-Toucan
59
  # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
@@ -66,7 +66,7 @@ AVAILABLE_MODELS = {
66
  # StyleTTS Kokoro v0.23
67
  # 'hexgrad/Kokoro-TTS/0.23': 'hexgrad/Kokoro-TTS',
68
  # StyleTTS Kokoro v1.0
69
- 'hexgrad/Kokoro-API': 'hexgrad/kokoro-API',
70
 
71
  # MaskGCT (by Amphion)
72
  # 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
@@ -92,14 +92,14 @@ AVAILABLE_MODELS = {
92
  # 'CAMB-AI/mars5_space': 'CAMB-AI/mars5_space', # slow inference; Unstable
93
 
94
  # Mars6
95
- 'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
96
 
97
  # Zonos
98
  # 'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
99
- 'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
100
 
101
  # Spark
102
- 'thunnai/SparkTTS': 'thunnai/SparkTTS',
103
 
104
  # Sesame
105
  'sesame/csm-1b' : 'sesame/csm-1b',
@@ -253,7 +253,7 @@ HF_SPACES = {
253
 
254
  # Microsoft Edge TTS
255
  'innoai/Edge-TTS-Text-to-Speech': {
256
- 'name': 'Microsoft Edge TTS',
257
  'function': '/predict',
258
  'text_param_index': 0,
259
  'return_audio_index': 0,
@@ -891,7 +891,7 @@ closed_source = [
891
  top_five = ['IndexTeam/IndexTTS']
892
 
893
  # prioritize low vote models
894
- sql = 'SELECT name FROM model WHERE (upvote + downvote) < 700 ORDER BY (upvote + downvote) ASC'
895
  conn = get_db()
896
  cursor = conn.cursor()
897
  cursor.execute(sql)
@@ -899,10 +899,13 @@ data = cursor.fetchall()
899
  for model in data:
900
  if (
901
  len(top_five) >= 5
902
- or model[0] in top_five
903
- or model[0] not in AVAILABLE_MODELS.keys()
904
  ):
905
  break
 
 
 
 
 
906
 
907
  top_five.append(model[0])
908
  print(f"low vote top_five: {top_five}")
 
26
  # '<keyname>':'<Space URL>'
27
  # gradio version that works with most spaces: 4.29
28
  # 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
29
+ # 'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
30
  # 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
31
  #'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
32
  #'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
 
35
  # 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
36
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
37
  # 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
38
+ # 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
39
 
40
  # E2 & F5 TTS
41
  # F5 model
42
+ # 'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
43
  # E2 model
44
  # 'mrfakename/E2-F5-TTS/E2': 'mrfakename/E2-F5-TTS', # seems to require multiple requests for setup
45
 
 
47
  # Parler Large model
48
  # 'parler-tts/parler_tts/large': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
49
  # Parler Mini model
50
+ # 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
51
  # 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
52
  # 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
53
 
54
  # # Microsoft Edge TTS
55
  # 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
56
+ # 'innoai/Edge-TTS-Text-to-Speech': '/Edge-TTS', # using Edge API
57
 
58
  # IMS-Toucan
59
  # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
 
66
  # StyleTTS Kokoro v0.23
67
  # 'hexgrad/Kokoro-TTS/0.23': 'hexgrad/Kokoro-TTS',
68
  # StyleTTS Kokoro v1.0
69
+ # 'hexgrad/Kokoro-API': 'hexgrad/kokoro-API',
70
 
71
  # MaskGCT (by Amphion)
72
  # 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
 
92
  # 'CAMB-AI/mars5_space': 'CAMB-AI/mars5_space', # slow inference; Unstable
93
 
94
  # Mars6
95
+ # 'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
96
 
97
  # Zonos
98
  # 'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
99
+ # 'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
100
 
101
  # Spark
102
+ # 'thunnai/SparkTTS': 'thunnai/SparkTTS',
103
 
104
  # Sesame
105
  'sesame/csm-1b' : 'sesame/csm-1b',
 
253
 
254
  # Microsoft Edge TTS
255
  'innoai/Edge-TTS-Text-to-Speech': {
256
+ 'name': 'Microsoft® Edge TTS',
257
  'function': '/predict',
258
  'text_param_index': 0,
259
  'return_audio_index': 0,
 
891
  top_five = ['IndexTeam/IndexTTS']
892
 
893
  # prioritize low vote models
894
+ sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
895
  conn = get_db()
896
  cursor = conn.cursor()
897
  cursor.execute(sql)
 
899
  for model in data:
900
  if (
901
  len(top_five) >= 5
 
 
902
  ):
903
  break
904
+ if (
905
+ model[0] in top_five
906
+ or model[0] not in AVAILABLE_MODELS.keys()
907
+ ):
908
+ continue
909
 
910
  top_five.append(model[0])
911
  print(f"low vote top_five: {top_five}")
app/synth.py CHANGED
@@ -147,12 +147,12 @@ def synthandreturn(text, autoplay, request: gr.Request):
147
 
148
  # return path to audio
149
  result = results
150
- if (not isinstance(results, str)):
151
- # return_audio_index may be a filepath string
152
- result = results[return_audio_index]
153
  if (isinstance(result, dict)):
154
  # return_audio_index is a dictionary
155
- result = results[return_audio_index]['value']
 
 
 
156
  else:
157
  # Use the private HF Space
158
  result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
 
147
 
148
  # return path to audio
149
  result = results
 
 
 
150
  if (isinstance(result, dict)):
151
  # return_audio_index is a dictionary
152
+ result = results['value']
153
+ elif (not isinstance(result, str)):
154
+ # return_audio_index may be a filepath string
155
+ result = results[return_audio_index]
156
  else:
157
  # Use the private HF Space
158
  result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")