Pendrokar commited on
Commit
e37c291
·
1 Parent(s): 10bbab8

xVA without DeepMoji; MARS 5 space inference very slow

Browse files
Files changed (1) hide show
  1. app/models.py +42 -3
app/models.py CHANGED
@@ -29,6 +29,7 @@ AVAILABLE_MODELS = {
29
  # 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # 4.29
30
  # 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
31
  'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
 
32
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
33
  'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
34
  'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
@@ -79,10 +80,10 @@ AVAILABLE_MODELS = {
79
  'srinivasbilla/llasa-3b-tts': 'srinivasbilla/llasa-3b-tts',
80
 
81
  # Mars5
82
- # 'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
83
 
84
  # Mars6
85
- # 'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
86
 
87
  # HF TTS w issues
88
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
@@ -143,6 +144,13 @@ HF_SPACES = {
143
 
144
  # xVASynth (CPU)
145
  'Pendrokar/xVASynth-TTS': {
 
 
 
 
 
 
 
146
  'name': 'xVASynth v3',
147
  'function': '/predict',
148
  'text_param_index': 0,
@@ -370,6 +378,16 @@ HF_SPACES = {
370
  # 'emoji': '🥵', # requires 300s reserved ZeroGPU!
371
  },
372
 
 
 
 
 
 
 
 
 
 
 
373
  # Mars6
374
  'CAMB-AI/mars6-turbo-demo': {
375
  'name': 'MARS 6',
@@ -378,7 +396,7 @@ HF_SPACES = {
378
  'return_audio_index': 0,
379
  'is_zero_gpu_space': False,
380
  'is_closed_source': True,
381
- 'series': 'llasa 3b',
382
  },
383
  }
384
 
@@ -388,6 +406,7 @@ DEFAULT_VOICE_SAMPLE = handle_file(DEFAULT_VOICE_SAMPLE_STR)
388
  DEFAULT_VOICE_TRANSCRIPT = "The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory."
389
  DEFAULT_VOICE_PROMPT = "female voice; very clear audio"
390
 
 
391
  OVERRIDE_INPUTS = {
392
  'coqui/xtts': {
393
  1: 'en',
@@ -419,6 +438,11 @@ OVERRIDE_INPUTS = {
419
  1: 'x_ex04', #fine-tuned voice model name
420
  3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
421
  },
 
 
 
 
 
422
  'suno/bark': {
423
  1: 'Speaker 3 (en)', # voice
424
  },
@@ -587,6 +611,21 @@ OVERRIDE_INPUTS = {
587
  'sample_audio_path': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3')
588
  },
589
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
  # MARS 6
591
  'CAMB-AI/mars6-turbo-demo': {
592
  'reference_audio': DEFAULT_VOICE_SAMPLE,
 
29
  # 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # 4.29
30
  # 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
31
  'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
32
+ 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
33
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
34
  'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
35
  'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
 
80
  'srinivasbilla/llasa-3b-tts': 'srinivasbilla/llasa-3b-tts',
81
 
82
  # Mars5
83
+ # 'CAMB-AI/mars5_space': 'CAMB-AI/mars5_space', # slow inference; Unstable
84
 
85
  # Mars6
86
+ 'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
87
 
88
  # HF TTS w issues
89
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
 
144
 
145
  # xVASynth (CPU)
146
  'Pendrokar/xVASynth-TTS': {
147
+ 'name': 'xVASynth v3 DeepMoji',
148
+ 'function': '/predict',
149
+ 'text_param_index': 0,
150
+ 'return_audio_index': 0,
151
+ 'series': 'xVASynth',
152
+ },
153
+ 'Pendrokar/xVASynth-TTS/NoDeepMoji': {
154
  'name': 'xVASynth v3',
155
  'function': '/predict',
156
  'text_param_index': 0,
 
378
  # 'emoji': '🥵', # requires 300s reserved ZeroGPU!
379
  },
380
 
381
+ # Mars5
382
+ 'CAMB-AI/mars5_space': {
383
+ 'name': 'MARS 5',
384
+ 'function': '/on_click',
385
+ 'text_param_index': 'text',
386
+ 'return_audio_index': 0,
387
+ 'is_zero_gpu_space': False,
388
+ 'series': 'MARS',
389
+ },
390
+
391
  # Mars6
392
  'CAMB-AI/mars6-turbo-demo': {
393
  'name': 'MARS 6',
 
396
  'return_audio_index': 0,
397
  'is_zero_gpu_space': False,
398
  'is_closed_source': True,
399
+ 'series': 'MARS',
400
  },
401
  }
402
 
 
406
  DEFAULT_VOICE_TRANSCRIPT = "The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory."
407
  DEFAULT_VOICE_PROMPT = "female voice; very clear audio"
408
 
409
+ # Older gradio spaces use unnamed parameters, both types are valid
410
  OVERRIDE_INPUTS = {
411
  'coqui/xtts': {
412
  1: 'en',
 
438
  1: 'x_ex04', #fine-tuned voice model name
439
  3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
440
  },
441
+ 'Pendrokar/xVASynth-TTS/NoDeepMoji': {
442
+ 1: 'x_ex02', #fine-tuned voice model name
443
+ 3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
444
+ 10: False, #Use DeepMoji
445
+ },
446
  'suno/bark': {
447
  1: 'Speaker 3 (en)', # voice
448
  },
 
611
  'sample_audio_path': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3')
612
  },
613
 
614
+ # MARS 5
615
+ 'CAMB-AI/mars5_space': {
616
+ 'audio_file': DEFAULT_VOICE_SAMPLE,
617
+ 'prompt_text': DEFAULT_VOICE_TRANSCRIPT,
618
+ 'temperature': 0.8,
619
+ 'top_k': -1,
620
+ 'top_p': 0.2,
621
+ 'typical_p': 1,
622
+ 'freq_penalty': 2.6,
623
+ 'presence_penalty': 0.4,
624
+ 'rep_penalty_window': 100,
625
+ 'nar_guidance_w': 3,
626
+ 'deep_clone': True, # too slow for deep clone
627
+ },
628
+
629
  # MARS 6
630
  'CAMB-AI/mars6-turbo-demo': {
631
  'reference_audio': DEFAULT_VOICE_SAMPLE,