Pendrokar commited on
Commit
e98c2b3
·
1 Parent(s): 32a7e32

New TTS: Zonos both archs; disabled xVASynth v3

Browse files
Files changed (2) hide show
  1. app/models.py +57 -2
  2. test_tts_zonos.py +52 -0
app/models.py CHANGED
@@ -31,7 +31,7 @@ AVAILABLE_MODELS = {
31
  #'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
32
  # 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
33
  'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
34
- 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
35
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
36
  'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
37
  # 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
@@ -90,6 +90,10 @@ AVAILABLE_MODELS = {
90
  # Mars6
91
  'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
92
 
 
 
 
 
93
  # HF TTS w issues
94
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
95
  # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
@@ -417,6 +421,24 @@ HF_SPACES = {
417
  'is_closed_source': True,
418
  'series': 'MARS',
419
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  }
421
 
422
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
@@ -658,8 +680,41 @@ OVERRIDE_INPUTS = {
658
  'quality_prefix': "48000",
659
  'clone_method': "deep-clone",
660
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661
  }
662
 
 
 
 
663
 
664
  # Model name mapping, can include models that users cannot vote on
665
  model_names = {
@@ -719,7 +774,7 @@ closed_source = [
719
  ]
720
 
721
  # top five models in order to always have one of them picked and scrutinized
722
- top_five = []
723
 
724
  # prioritize low vote models
725
  sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
 
31
  #'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
32
  # 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
33
  'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
34
+ # 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
35
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
36
  'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
37
  # 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
 
90
  # Mars6
91
  'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
92
 
93
+ # Zonos
94
+ 'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
95
+ 'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
96
+
97
  # HF TTS w issues
98
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
99
  # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
 
421
  'is_closed_source': True,
422
  'series': 'MARS',
423
  },
424
+
425
+ # Zonos
426
+ 'Steveeeeeeen/Zonos': {
427
+ 'name': 'Zonos T',
428
+ 'function': '/generate_audio',
429
+ 'text_param_index': 'text',
430
+ 'return_audio_index': 0,
431
+ 'is_zero_gpu_space': True,
432
+ 'series': 'Zonos',
433
+ },
434
+ 'Steveeeeeeen/Zonos/hybrid': {
435
+ 'name': 'Zonos H',
436
+ 'function': '/generate_audio',
437
+ 'text_param_index': 'text',
438
+ 'return_audio_index': 0,
439
+ 'is_zero_gpu_space': True,
440
+ 'series': 'Zonos',
441
+ },
442
  }
443
 
444
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
 
680
  'quality_prefix': "48000",
681
  'clone_method': "deep-clone",
682
  },
683
+
684
+ # Zonos
685
+ 'Steveeeeeeen/Zonos': {
686
+ 'model_choice':"Zyphra/Zonos-v0.1-transformer",
687
+ 'language': "en-us",
688
+ 'speaker_audio': None, # optional
689
+ 'prefix_audio': handle_file('https://huggingface.co/spaces/Steveeeeeeen/Zonos/resolve/main/assets/silence_100ms.wav'),
690
+ # 'e1': 1,
691
+ # 'e2': 0.05,
692
+ # 'e3': 0.05,
693
+ # 'e4': 0.05,
694
+ # 'e5': 0.05,
695
+ # 'e6': 0.05,
696
+ # 'e7': 0.1,
697
+ # 'e8': 0.2,
698
+ 'vq_single': 0.78,
699
+ 'fmax': 24000,
700
+ 'pitch_std': 45,
701
+ 'speaking_rate': 15,
702
+ 'dnsmos_ovrl': 4,
703
+ 'speaker_noised': False,
704
+ 'cfg_scale': 2,
705
+ 'min_p': 0.15,
706
+ 'seed': 420,
707
+ 'randomize_seed': False, # Set to False to easily recreate the state
708
+ 'unconditional_keys': ["emotion"], # makes it ignore e1-e8
709
+ },
710
+ # 'Steveeeeeeen/Zonos/hybrid': {
711
+ # 'model_choice': 'Zyphra/Zonos-v0.1-hybrid',
712
+ # },
713
  }
714
 
715
+ # minor mods to model from the same space
716
+ OVERRIDE_INPUTS['Steveeeeeeen/Zonos/hybrid'] = OVERRIDE_INPUTS['Steveeeeeeen/Zonos']
717
+ OVERRIDE_INPUTS['Steveeeeeeen/Zonos/hybrid']['model_choice'] = 'Zyphra/Zonos-v0.1-hybrid'
718
 
719
  # Model name mapping, can include models that users cannot vote on
720
  model_names = {
 
774
  ]
775
 
776
  # top five models in order to always have one of them picked and scrutinized
777
+ top_five = ['Steveeeeeeen/Zonos', 'Steveeeeeeen/Zonos/hybrid']
778
 
779
  # prioritize low vote models
780
  sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
test_tts_zonos.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from test_overrides import _get_param_examples, _override_params
3
+ from gradio_client import Client, file
4
+
5
+ model = "Steveeeeeeen/Zonos/hybrid"
6
+ # client = Client("Pendrokar/Zonos", hf_token=os.getenv('HF_TOKEN'))
7
+ client = Client("Steveeeeeeen/Zonos", hf_token=os.getenv('HF_TOKEN'))
8
+ # client = Client(model, hf_token=os.getenv('HF_TOKEN'))
9
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
10
+ # print(endpoints)
11
+
12
+ api_name = '/generate_audio'
13
+ fn_index = None
14
+ end_parameters = None
15
+ text = 'This is what my voice sounds like.'
16
+
17
+ end_parameters = _get_param_examples(
18
+ endpoints['named_endpoints'][api_name]['parameters']
19
+ )
20
+ print(end_parameters)
21
+
22
+
23
+ space_inputs = end_parameters
24
+ # override some or all default parameters
25
+ space_inputs = _override_params(end_parameters, model)
26
+
27
+ if(type(space_inputs) == dict):
28
+ space_inputs['text'] = text
29
+ result = client.predict(
30
+ **space_inputs,
31
+ api_name=api_name,
32
+ fn_index=fn_index
33
+ )
34
+ else:
35
+ space_inputs[0] = text
36
+ result = client.predict(
37
+ *space_inputs,
38
+ api_name=api_name,
39
+ fn_index=fn_index
40
+ )
41
+ # space_inputs = {str(i): value for i, value in enumerate(space_inputs)}
42
+
43
+ print(space_inputs)
44
+ # print(*space_inputs)
45
+ # print(**space_inputs)
46
+
47
+ # result = client.predict(
48
+ # **space_inputs,
49
+ # api_name=api_name,
50
+ # fn_index=fn_index
51
+ # )
52
+ print(result)