Pendrokar commited on
Commit
2b8f593
Β·
1 Parent(s): b44bd57

New TTS: Spark

Browse files
Files changed (1) hide show
  1. app/models.py +38 -18
app/models.py CHANGED
@@ -31,11 +31,11 @@ AVAILABLE_MODELS = {
31
  #'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
32
  #'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
33
  # 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
34
- 'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
35
  # 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
36
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
37
- 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
38
- # 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
39
 
40
  # E2 & F5 TTS
41
  # F5 model
@@ -45,9 +45,9 @@ AVAILABLE_MODELS = {
45
 
46
  # # Parler
47
  # Parler Large model
48
- 'parler-tts/parler_tts/large': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
49
  # Parler Mini model
50
- # 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
51
  # 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
52
  # 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
53
 
@@ -95,9 +95,12 @@ AVAILABLE_MODELS = {
95
  'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
96
 
97
  # Zonos
98
- 'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
99
  'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
100
 
 
 
 
101
  # HF TTS w issues
102
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
103
  # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
@@ -255,7 +258,7 @@ HF_SPACES = {
255
  'fishaudio/fish-speech-1': {
256
  'name': 'Fish Speech',
257
  'function': '/inference_wrapper',
258
- 'text_param_index': 0,
259
  'return_audio_index': 0,
260
  'series': 'Fish Speech',
261
  'emoji': '😷', # broken space
@@ -468,6 +471,16 @@ HF_SPACES = {
468
  'is_zero_gpu_space': True,
469
  'series': 'Zonos',
470
  },
 
 
 
 
 
 
 
 
 
 
471
  }
472
 
473
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
@@ -568,16 +581,16 @@ OVERRIDE_INPUTS = {
568
  },
569
 
570
  'fishaudio/fish-speech-1': {
571
- 1: False, # normalize
572
- 2: handle_file('https://huggingface.co/spaces/fishaudio/fish-speech-1/resolve/main/examples/English.wav'), # reference_audio
573
- 3: 'In the ancient land of Eldoria, where the skies were painted with shades of mystic hues and the forests whispered secrets of old, there existed a dragon named Zephyros. Unlike the fearsome tales of dragons that plagued human hearts with terror, Zephyros was a creature of wonder and wisdom, revered by all who knew of his existence.', # reference_text
574
- 4: 0, # max_new_tokens
575
- 5: 200, # chunk_length
576
- 6: 0.7, # top_p
577
- 7: 1.2, # repetition_penalty
578
- 8: 0.7, # temperature
579
- 9: 0, #seed
580
- 10: "never", #use_memory_cache
581
  },
582
 
583
  # F5
@@ -746,6 +759,13 @@ OVERRIDE_INPUTS = {
746
  # 'Steveeeeeeen/Zonos/hybrid': {
747
  # 'model_choice': 'Zyphra/Zonos-v0.1-hybrid',
748
  # },
 
 
 
 
 
 
 
749
  }
750
 
751
  # minor mods to model from the same space
@@ -810,7 +830,7 @@ closed_source = [
810
  ]
811
 
812
  # top five models in order to always have one of them picked and scrutinized
813
- top_five = ['HKUST-Audio/Llasa-1B-finetuned-for-two-speakers']
814
 
815
  # prioritize low vote models
816
  sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
 
31
  #'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
32
  #'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
33
  # 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
34
+ # 'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
35
  # 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
36
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
37
+ # 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
38
+ 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
39
 
40
  # E2 & F5 TTS
41
  # F5 model
 
45
 
46
  # # Parler
47
  # Parler Large model
48
+ # 'parler-tts/parler_tts/large': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
49
  # Parler Mini model
50
+ 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
51
  # 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
52
  # 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
53
 
 
95
  'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
96
 
97
  # Zonos
98
+ # 'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
99
  'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
100
 
101
+ # Spark
102
+ 'thunnai/SparkTTS': 'thunnai/SparkTTS',
103
+
104
  # HF TTS w issues
105
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
106
  # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
 
258
  'fishaudio/fish-speech-1': {
259
  'name': 'Fish Speech',
260
  'function': '/inference_wrapper',
261
+ 'text_param_index': 'text',
262
  'return_audio_index': 0,
263
  'series': 'Fish Speech',
264
  'emoji': '😷', # broken space
 
471
  'is_zero_gpu_space': True,
472
  'series': 'Zonos',
473
  },
474
+
475
+ # Spark-TTS
476
+ 'thunnai/SparkTTS': {
477
+ 'name': 'Spark-TTS',
478
+ 'function': '/voice_clone',
479
+ 'text_param_index': 'text',
480
+ 'return_audio_index': 0,
481
+ 'is_zero_gpu_space': True,
482
+ 'series': 'Spark-TTS',
483
+ },
484
  }
485
 
486
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
 
581
  },
582
 
583
  'fishaudio/fish-speech-1': {
584
+ 'normalize': False,
585
+ 'reference_audio': handle_file('https://huggingface.co/spaces/fishaudio/fish-speech-1/resolve/main/examples/English.wav'),
586
+ 'reference_text': 'In the ancient land of Eldoria, where the skies were painted with shades of mystic hues and the forests whispered secrets of old, there existed a dragon named Zephyros. Unlike the fearsome tales of dragons that plagued human hearts with terror, Zephyros was a creature of wonder and wisdom, revered by all who knew of his existence.', # reference_text
587
+ 'max_new_tokens': 1024,
588
+ 'chunk_length': 200,
589
+ 'top_p': 0.7,
590
+ 'repetition_penalty': 1.2,
591
+ 'temperature': 0.7,
592
+ 'seed': 0,
593
+ 'use_memory_cache': "never",
594
  },
595
 
596
  # F5
 
759
  # 'Steveeeeeeen/Zonos/hybrid': {
760
  # 'model_choice': 'Zyphra/Zonos-v0.1-hybrid',
761
  # },
762
+
763
+ # Spark-TTS
764
+ 'thunnai/SparkTTS' : {
765
+ 'prompt_text': DEFAULT_VOICE_TRANSCRIPT,
766
+ 'prompt_wav_upload': DEFAULT_VOICE_SAMPLE,
767
+ 'prompt_wav_record': None,
768
+ }
769
  }
770
 
771
  # minor mods to model from the same space
 
830
  ]
831
 
832
  # top five models in order to always have one of them picked and scrutinized
833
+ top_five = ['Spark-TTS']
834
 
835
  # prioritize low vote models
836
  sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'