Spaces:
Running
on
Zero
Running
on
Zero
IndexTTS return audio index fix
Browse files- app/models.py +16 -13
- app/synth.py +4 -4
app/models.py
CHANGED
@@ -26,7 +26,7 @@ AVAILABLE_MODELS = {
|
|
26 |
# '<keyname>':'<Space URL>'
|
27 |
# gradio version that works with most spaces: 4.29
|
28 |
# 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
|
29 |
-
'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
|
30 |
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
31 |
#'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
32 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
@@ -35,11 +35,11 @@ AVAILABLE_MODELS = {
|
|
35 |
# 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
36 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
37 |
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
38 |
-
'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
|
39 |
|
40 |
# E2 & F5 TTS
|
41 |
# F5 model
|
42 |
-
'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
|
43 |
# E2 model
|
44 |
# 'mrfakename/E2-F5-TTS/E2': 'mrfakename/E2-F5-TTS', # seems to require multiple requests for setup
|
45 |
|
@@ -47,13 +47,13 @@ AVAILABLE_MODELS = {
|
|
47 |
# Parler Large model
|
48 |
# 'parler-tts/parler_tts/large': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
49 |
# Parler Mini model
|
50 |
-
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
51 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
|
52 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
53 |
|
54 |
# # Microsoft Edge TTS
|
55 |
# 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
|
56 |
-
'innoai/Edge-TTS-Text-to-Speech': '/Edge-TTS', # using Edge API
|
57 |
|
58 |
# IMS-Toucan
|
59 |
# 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
|
@@ -66,7 +66,7 @@ AVAILABLE_MODELS = {
|
|
66 |
# StyleTTS Kokoro v0.23
|
67 |
# 'hexgrad/Kokoro-TTS/0.23': 'hexgrad/Kokoro-TTS',
|
68 |
# StyleTTS Kokoro v1.0
|
69 |
-
'hexgrad/Kokoro-API': 'hexgrad/kokoro-API',
|
70 |
|
71 |
# MaskGCT (by Amphion)
|
72 |
# 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
|
@@ -92,14 +92,14 @@ AVAILABLE_MODELS = {
|
|
92 |
# 'CAMB-AI/mars5_space': 'CAMB-AI/mars5_space', # slow inference; Unstable
|
93 |
|
94 |
# Mars6
|
95 |
-
'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
|
96 |
|
97 |
# Zonos
|
98 |
# 'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
|
99 |
-
'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
|
100 |
|
101 |
# Spark
|
102 |
-
'thunnai/SparkTTS': 'thunnai/SparkTTS',
|
103 |
|
104 |
# Sesame
|
105 |
'sesame/csm-1b' : 'sesame/csm-1b',
|
@@ -253,7 +253,7 @@ HF_SPACES = {
|
|
253 |
|
254 |
# Microsoft Edge TTS
|
255 |
'innoai/Edge-TTS-Text-to-Speech': {
|
256 |
-
'name': 'Microsoft
|
257 |
'function': '/predict',
|
258 |
'text_param_index': 0,
|
259 |
'return_audio_index': 0,
|
@@ -891,7 +891,7 @@ closed_source = [
|
|
891 |
top_five = ['IndexTeam/IndexTTS']
|
892 |
|
893 |
# prioritize low vote models
|
894 |
-
sql = 'SELECT name FROM model WHERE (upvote + downvote) <
|
895 |
conn = get_db()
|
896 |
cursor = conn.cursor()
|
897 |
cursor.execute(sql)
|
@@ -899,10 +899,13 @@ data = cursor.fetchall()
|
|
899 |
for model in data:
|
900 |
if (
|
901 |
len(top_five) >= 5
|
902 |
-
or model[0] in top_five
|
903 |
-
or model[0] not in AVAILABLE_MODELS.keys()
|
904 |
):
|
905 |
break
|
|
|
|
|
|
|
|
|
|
|
906 |
|
907 |
top_five.append(model[0])
|
908 |
print(f"low vote top_five: {top_five}")
|
|
|
26 |
# '<keyname>':'<Space URL>'
|
27 |
# gradio version that works with most spaces: 4.29
|
28 |
# 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
|
29 |
+
# 'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
|
30 |
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
31 |
#'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
32 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
|
|
35 |
# 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
36 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
37 |
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
38 |
+
# 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
|
39 |
|
40 |
# E2 & F5 TTS
|
41 |
# F5 model
|
42 |
+
# 'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
|
43 |
# E2 model
|
44 |
# 'mrfakename/E2-F5-TTS/E2': 'mrfakename/E2-F5-TTS', # seems to require multiple requests for setup
|
45 |
|
|
|
47 |
# Parler Large model
|
48 |
# 'parler-tts/parler_tts/large': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
49 |
# Parler Mini model
|
50 |
+
# 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
51 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
|
52 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
53 |
|
54 |
# # Microsoft Edge TTS
|
55 |
# 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
|
56 |
+
# 'innoai/Edge-TTS-Text-to-Speech': '/Edge-TTS', # using Edge API
|
57 |
|
58 |
# IMS-Toucan
|
59 |
# 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
|
|
|
66 |
# StyleTTS Kokoro v0.23
|
67 |
# 'hexgrad/Kokoro-TTS/0.23': 'hexgrad/Kokoro-TTS',
|
68 |
# StyleTTS Kokoro v1.0
|
69 |
+
# 'hexgrad/Kokoro-API': 'hexgrad/kokoro-API',
|
70 |
|
71 |
# MaskGCT (by Amphion)
|
72 |
# 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
|
|
|
92 |
# 'CAMB-AI/mars5_space': 'CAMB-AI/mars5_space', # slow inference; Unstable
|
93 |
|
94 |
# Mars6
|
95 |
+
# 'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
|
96 |
|
97 |
# Zonos
|
98 |
# 'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
|
99 |
+
# 'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
|
100 |
|
101 |
# Spark
|
102 |
+
# 'thunnai/SparkTTS': 'thunnai/SparkTTS',
|
103 |
|
104 |
# Sesame
|
105 |
'sesame/csm-1b' : 'sesame/csm-1b',
|
|
|
253 |
|
254 |
# Microsoft Edge TTS
|
255 |
'innoai/Edge-TTS-Text-to-Speech': {
|
256 |
+
'name': 'Microsoft® Edge TTS',
|
257 |
'function': '/predict',
|
258 |
'text_param_index': 0,
|
259 |
'return_audio_index': 0,
|
|
|
891 |
top_five = ['IndexTeam/IndexTTS']
|
892 |
|
893 |
# prioritize low vote models
|
894 |
+
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
|
895 |
conn = get_db()
|
896 |
cursor = conn.cursor()
|
897 |
cursor.execute(sql)
|
|
|
899 |
for model in data:
|
900 |
if (
|
901 |
len(top_five) >= 5
|
|
|
|
|
902 |
):
|
903 |
break
|
904 |
+
if (
|
905 |
+
model[0] in top_five
|
906 |
+
or model[0] not in AVAILABLE_MODELS.keys()
|
907 |
+
):
|
908 |
+
continue
|
909 |
|
910 |
top_five.append(model[0])
|
911 |
print(f"low vote top_five: {top_five}")
|
app/synth.py
CHANGED
@@ -147,12 +147,12 @@ def synthandreturn(text, autoplay, request: gr.Request):
|
|
147 |
|
148 |
# return path to audio
|
149 |
result = results
|
150 |
-
if (not isinstance(results, str)):
|
151 |
-
# return_audio_index may be a filepath string
|
152 |
-
result = results[return_audio_index]
|
153 |
if (isinstance(result, dict)):
|
154 |
# return_audio_index is a dictionary
|
155 |
-
result = results[
|
|
|
|
|
|
|
156 |
else:
|
157 |
# Use the private HF Space
|
158 |
result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|
|
|
147 |
|
148 |
# return path to audio
|
149 |
result = results
|
|
|
|
|
|
|
150 |
if (isinstance(result, dict)):
|
151 |
# return_audio_index is a dictionary
|
152 |
+
result = results['value']
|
153 |
+
elif (not isinstance(result, str)):
|
154 |
+
# return_audio_index may be a filepath string
|
155 |
+
result = results[return_audio_index]
|
156 |
else:
|
157 |
# Use the private HF Space
|
158 |
result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|