Spaces:
Running
Running
IndexTTS return audio index fix
Browse files- app/models.py +16 -13
- app/synth.py +4 -4
app/models.py
CHANGED
|
@@ -26,7 +26,7 @@ AVAILABLE_MODELS = {
|
|
| 26 |
# '<keyname>':'<Space URL>'
|
| 27 |
# gradio version that works with most spaces: 4.29
|
| 28 |
# 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
|
| 29 |
-
'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
|
| 30 |
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
| 31 |
#'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
| 32 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
|
@@ -35,11 +35,11 @@ AVAILABLE_MODELS = {
|
|
| 35 |
# 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
| 36 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
| 37 |
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
| 38 |
-
'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
|
| 39 |
|
| 40 |
# E2 & F5 TTS
|
| 41 |
# F5 model
|
| 42 |
-
'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
|
| 43 |
# E2 model
|
| 44 |
# 'mrfakename/E2-F5-TTS/E2': 'mrfakename/E2-F5-TTS', # seems to require multiple requests for setup
|
| 45 |
|
|
@@ -47,13 +47,13 @@ AVAILABLE_MODELS = {
|
|
| 47 |
# Parler Large model
|
| 48 |
# 'parler-tts/parler_tts/large': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 49 |
# Parler Mini model
|
| 50 |
-
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 51 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
|
| 52 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
| 53 |
|
| 54 |
# # Microsoft Edge TTS
|
| 55 |
# 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
|
| 56 |
-
'innoai/Edge-TTS-Text-to-Speech': '/Edge-TTS', # using Edge API
|
| 57 |
|
| 58 |
# IMS-Toucan
|
| 59 |
# 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
|
|
@@ -66,7 +66,7 @@ AVAILABLE_MODELS = {
|
|
| 66 |
# StyleTTS Kokoro v0.23
|
| 67 |
# 'hexgrad/Kokoro-TTS/0.23': 'hexgrad/Kokoro-TTS',
|
| 68 |
# StyleTTS Kokoro v1.0
|
| 69 |
-
'hexgrad/Kokoro-API': 'hexgrad/kokoro-API',
|
| 70 |
|
| 71 |
# MaskGCT (by Amphion)
|
| 72 |
# 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
|
|
@@ -92,14 +92,14 @@ AVAILABLE_MODELS = {
|
|
| 92 |
# 'CAMB-AI/mars5_space': 'CAMB-AI/mars5_space', # slow inference; Unstable
|
| 93 |
|
| 94 |
# Mars6
|
| 95 |
-
'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
|
| 96 |
|
| 97 |
# Zonos
|
| 98 |
# 'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
|
| 99 |
-
'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
|
| 100 |
|
| 101 |
# Spark
|
| 102 |
-
'thunnai/SparkTTS': 'thunnai/SparkTTS',
|
| 103 |
|
| 104 |
# Sesame
|
| 105 |
'sesame/csm-1b' : 'sesame/csm-1b',
|
|
@@ -253,7 +253,7 @@ HF_SPACES = {
|
|
| 253 |
|
| 254 |
# Microsoft Edge TTS
|
| 255 |
'innoai/Edge-TTS-Text-to-Speech': {
|
| 256 |
-
'name': 'Microsoft
|
| 257 |
'function': '/predict',
|
| 258 |
'text_param_index': 0,
|
| 259 |
'return_audio_index': 0,
|
|
@@ -891,7 +891,7 @@ closed_source = [
|
|
| 891 |
top_five = ['IndexTeam/IndexTTS']
|
| 892 |
|
| 893 |
# prioritize low vote models
|
| 894 |
-
sql = 'SELECT name FROM model WHERE (upvote + downvote) <
|
| 895 |
conn = get_db()
|
| 896 |
cursor = conn.cursor()
|
| 897 |
cursor.execute(sql)
|
|
@@ -899,10 +899,13 @@ data = cursor.fetchall()
|
|
| 899 |
for model in data:
|
| 900 |
if (
|
| 901 |
len(top_five) >= 5
|
| 902 |
-
or model[0] in top_five
|
| 903 |
-
or model[0] not in AVAILABLE_MODELS.keys()
|
| 904 |
):
|
| 905 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
|
| 907 |
top_five.append(model[0])
|
| 908 |
print(f"low vote top_five: {top_five}")
|
|
|
|
| 26 |
# '<keyname>':'<Space URL>'
|
| 27 |
# gradio version that works with most spaces: 4.29
|
| 28 |
# 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
|
| 29 |
+
# 'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
|
| 30 |
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
| 31 |
#'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
| 32 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
|
|
|
| 35 |
# 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
| 36 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
| 37 |
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
| 38 |
+
# 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
|
| 39 |
|
| 40 |
# E2 & F5 TTS
|
| 41 |
# F5 model
|
| 42 |
+
# 'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
|
| 43 |
# E2 model
|
| 44 |
# 'mrfakename/E2-F5-TTS/E2': 'mrfakename/E2-F5-TTS', # seems to require multiple requests for setup
|
| 45 |
|
|
|
|
| 47 |
# Parler Large model
|
| 48 |
# 'parler-tts/parler_tts/large': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 49 |
# Parler Mini model
|
| 50 |
+
# 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 51 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
|
| 52 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
| 53 |
|
| 54 |
# # Microsoft Edge TTS
|
| 55 |
# 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
|
| 56 |
+
# 'innoai/Edge-TTS-Text-to-Speech': '/Edge-TTS', # using Edge API
|
| 57 |
|
| 58 |
# IMS-Toucan
|
| 59 |
# 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
|
|
|
|
| 66 |
# StyleTTS Kokoro v0.23
|
| 67 |
# 'hexgrad/Kokoro-TTS/0.23': 'hexgrad/Kokoro-TTS',
|
| 68 |
# StyleTTS Kokoro v1.0
|
| 69 |
+
# 'hexgrad/Kokoro-API': 'hexgrad/kokoro-API',
|
| 70 |
|
| 71 |
# MaskGCT (by Amphion)
|
| 72 |
# 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
|
|
|
|
| 92 |
# 'CAMB-AI/mars5_space': 'CAMB-AI/mars5_space', # slow inference; Unstable
|
| 93 |
|
| 94 |
# Mars6
|
| 95 |
+
# 'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
|
| 96 |
|
| 97 |
# Zonos
|
| 98 |
# 'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
|
| 99 |
+
# 'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
|
| 100 |
|
| 101 |
# Spark
|
| 102 |
+
# 'thunnai/SparkTTS': 'thunnai/SparkTTS',
|
| 103 |
|
| 104 |
# Sesame
|
| 105 |
'sesame/csm-1b' : 'sesame/csm-1b',
|
|
|
|
| 253 |
|
| 254 |
# Microsoft Edge TTS
|
| 255 |
'innoai/Edge-TTS-Text-to-Speech': {
|
| 256 |
+
'name': 'Microsoft® Edge TTS',
|
| 257 |
'function': '/predict',
|
| 258 |
'text_param_index': 0,
|
| 259 |
'return_audio_index': 0,
|
|
|
|
| 891 |
top_five = ['IndexTeam/IndexTTS']
|
| 892 |
|
| 893 |
# prioritize low vote models
|
| 894 |
+
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
|
| 895 |
conn = get_db()
|
| 896 |
cursor = conn.cursor()
|
| 897 |
cursor.execute(sql)
|
|
|
|
| 899 |
for model in data:
|
| 900 |
if (
|
| 901 |
len(top_five) >= 5
|
|
|
|
|
|
|
| 902 |
):
|
| 903 |
break
|
| 904 |
+
if (
|
| 905 |
+
model[0] in top_five
|
| 906 |
+
or model[0] not in AVAILABLE_MODELS.keys()
|
| 907 |
+
):
|
| 908 |
+
continue
|
| 909 |
|
| 910 |
top_five.append(model[0])
|
| 911 |
print(f"low vote top_five: {top_five}")
|
app/synth.py
CHANGED
|
@@ -147,12 +147,12 @@ def synthandreturn(text, autoplay, request: gr.Request):
|
|
| 147 |
|
| 148 |
# return path to audio
|
| 149 |
result = results
|
| 150 |
-
if (not isinstance(results, str)):
|
| 151 |
-
# return_audio_index may be a filepath string
|
| 152 |
-
result = results[return_audio_index]
|
| 153 |
if (isinstance(result, dict)):
|
| 154 |
# return_audio_index is a dictionary
|
| 155 |
-
result = results[
|
|
|
|
|
|
|
|
|
|
| 156 |
else:
|
| 157 |
# Use the private HF Space
|
| 158 |
result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|
|
|
|
| 147 |
|
| 148 |
# return path to audio
|
| 149 |
result = results
|
|
|
|
|
|
|
|
|
|
| 150 |
if (isinstance(result, dict)):
|
| 151 |
# return_audio_index is a dictionary
|
| 152 |
+
result = results['value']
|
| 153 |
+
elif (not isinstance(result, str)):
|
| 154 |
+
# return_audio_index may be a filepath string
|
| 155 |
+
result = results[return_audio_index]
|
| 156 |
else:
|
| 157 |
# Use the private HF Space
|
| 158 |
result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|