Spaces:
Running
on
Zero
Running
on
Zero
xVA without DeepMoji; MARS 5 space inference very slow
Browse files- app/models.py +42 -3
app/models.py
CHANGED
@@ -29,6 +29,7 @@ AVAILABLE_MODELS = {
|
|
29 |
# 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # 4.29
|
30 |
# 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
|
31 |
'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
|
|
32 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
33 |
'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
34 |
'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
|
@@ -79,10 +80,10 @@ AVAILABLE_MODELS = {
|
|
79 |
'srinivasbilla/llasa-3b-tts': 'srinivasbilla/llasa-3b-tts',
|
80 |
|
81 |
# Mars5
|
82 |
-
# 'CAMB-AI/
|
83 |
|
84 |
# Mars6
|
85 |
-
|
86 |
|
87 |
# HF TTS w issues
|
88 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
@@ -143,6 +144,13 @@ HF_SPACES = {
|
|
143 |
|
144 |
# xVASynth (CPU)
|
145 |
'Pendrokar/xVASynth-TTS': {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
'name': 'xVASynth v3',
|
147 |
'function': '/predict',
|
148 |
'text_param_index': 0,
|
@@ -370,6 +378,16 @@ HF_SPACES = {
|
|
370 |
# 'emoji': '🥵', # requires 300s reserved ZeroGPU!
|
371 |
},
|
372 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
# Mars6
|
374 |
'CAMB-AI/mars6-turbo-demo': {
|
375 |
'name': 'MARS 6',
|
@@ -378,7 +396,7 @@ HF_SPACES = {
|
|
378 |
'return_audio_index': 0,
|
379 |
'is_zero_gpu_space': False,
|
380 |
'is_closed_source': True,
|
381 |
-
'series': '
|
382 |
},
|
383 |
}
|
384 |
|
@@ -388,6 +406,7 @@ DEFAULT_VOICE_SAMPLE = handle_file(DEFAULT_VOICE_SAMPLE_STR)
|
|
388 |
DEFAULT_VOICE_TRANSCRIPT = "The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory."
|
389 |
DEFAULT_VOICE_PROMPT = "female voice; very clear audio"
|
390 |
|
|
|
391 |
OVERRIDE_INPUTS = {
|
392 |
'coqui/xtts': {
|
393 |
1: 'en',
|
@@ -419,6 +438,11 @@ OVERRIDE_INPUTS = {
|
|
419 |
1: 'x_ex04', #fine-tuned voice model name
|
420 |
3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
|
421 |
},
|
|
|
|
|
|
|
|
|
|
|
422 |
'suno/bark': {
|
423 |
1: 'Speaker 3 (en)', # voice
|
424 |
},
|
@@ -587,6 +611,21 @@ OVERRIDE_INPUTS = {
|
|
587 |
'sample_audio_path': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3')
|
588 |
},
|
589 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
# MARS 6
|
591 |
'CAMB-AI/mars6-turbo-demo': {
|
592 |
'reference_audio': DEFAULT_VOICE_SAMPLE,
|
|
|
29 |
# 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # 4.29
|
30 |
# 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
|
31 |
'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
32 |
+
'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
33 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
34 |
'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
35 |
'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
|
|
|
80 |
'srinivasbilla/llasa-3b-tts': 'srinivasbilla/llasa-3b-tts',
|
81 |
|
82 |
# Mars5
|
83 |
+
# 'CAMB-AI/mars5_space': 'CAMB-AI/mars5_space', # slow inference; Unstable
|
84 |
|
85 |
# Mars6
|
86 |
+
'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
|
87 |
|
88 |
# HF TTS w issues
|
89 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
|
|
144 |
|
145 |
# xVASynth (CPU)
|
146 |
'Pendrokar/xVASynth-TTS': {
|
147 |
+
'name': 'xVASynth v3 DeepMoji',
|
148 |
+
'function': '/predict',
|
149 |
+
'text_param_index': 0,
|
150 |
+
'return_audio_index': 0,
|
151 |
+
'series': 'xVASynth',
|
152 |
+
},
|
153 |
+
'Pendrokar/xVASynth-TTS/NoDeepMoji': {
|
154 |
'name': 'xVASynth v3',
|
155 |
'function': '/predict',
|
156 |
'text_param_index': 0,
|
|
|
378 |
# 'emoji': '🥵', # requires 300s reserved ZeroGPU!
|
379 |
},
|
380 |
|
381 |
+
# Mars5
|
382 |
+
'CAMB-AI/mars5_space': {
|
383 |
+
'name': 'MARS 5',
|
384 |
+
'function': '/on_click',
|
385 |
+
'text_param_index': 'text',
|
386 |
+
'return_audio_index': 0,
|
387 |
+
'is_zero_gpu_space': False,
|
388 |
+
'series': 'MARS',
|
389 |
+
},
|
390 |
+
|
391 |
# Mars6
|
392 |
'CAMB-AI/mars6-turbo-demo': {
|
393 |
'name': 'MARS 6',
|
|
|
396 |
'return_audio_index': 0,
|
397 |
'is_zero_gpu_space': False,
|
398 |
'is_closed_source': True,
|
399 |
+
'series': 'MARS',
|
400 |
},
|
401 |
}
|
402 |
|
|
|
406 |
DEFAULT_VOICE_TRANSCRIPT = "The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory."
|
407 |
DEFAULT_VOICE_PROMPT = "female voice; very clear audio"
|
408 |
|
409 |
+
# Older gradio spaces use unnamed parameters, both types are valid
|
410 |
OVERRIDE_INPUTS = {
|
411 |
'coqui/xtts': {
|
412 |
1: 'en',
|
|
|
438 |
1: 'x_ex04', #fine-tuned voice model name
|
439 |
3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
|
440 |
},
|
441 |
+
'Pendrokar/xVASynth-TTS/NoDeepMoji': {
|
442 |
+
1: 'x_ex02', #fine-tuned voice model name
|
443 |
+
3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
|
444 |
+
10: False, #Use DeepMoji
|
445 |
+
},
|
446 |
'suno/bark': {
|
447 |
1: 'Speaker 3 (en)', # voice
|
448 |
},
|
|
|
611 |
'sample_audio_path': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3')
|
612 |
},
|
613 |
|
614 |
+
# MARS 5
|
615 |
+
'CAMB-AI/mars5_space': {
|
616 |
+
'audio_file': DEFAULT_VOICE_SAMPLE,
|
617 |
+
'prompt_text': DEFAULT_VOICE_TRANSCRIPT,
|
618 |
+
'temperature': 0.8,
|
619 |
+
'top_k': -1,
|
620 |
+
'top_p': 0.2,
|
621 |
+
'typical_p': 1,
|
622 |
+
'freq_penalty': 2.6,
|
623 |
+
'presence_penalty': 0.4,
|
624 |
+
'rep_penalty_window': 100,
|
625 |
+
'nar_guidance_w': 3,
|
626 |
+
'deep_clone': True, # too slow for deep clone
|
627 |
+
},
|
628 |
+
|
629 |
# MARS 6
|
630 |
'CAMB-AI/mars6-turbo-demo': {
|
631 |
'reference_audio': DEFAULT_VOICE_SAMPLE,
|