Flux9665 commited on
Commit
9657f19
·
1 Parent(s): 4e28420

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -16
app.py CHANGED
@@ -33,6 +33,7 @@ class TTS_Interface:
33
  self.model = Meta_FastSpeech2(device=self.device)
34
  self.current_speaker = "English Speaker's Voice"
35
  self.current_language = "English"
 
36
  self.language_id_lookup = {
37
  "English" : "en",
38
  "German" : "de",
@@ -62,7 +63,9 @@ class TTS_Interface:
62
  "Italian Speaker's Voice" : "reference_audios/italian.flac",
63
  }
64
 
65
- def read(self, prompt, language, speaker):
 
 
66
  if len(prompt) > 2000:
67
  if language == "English":
68
  prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
@@ -90,8 +93,11 @@ class TTS_Interface:
90
  prompt = "Il tuo input era troppo lungo. Per favore, prova un testo più corto o dividilo in più parti."
91
 
92
  if self.current_language != language:
93
- self.model.set_language(self.language_id_lookup[language])
94
  self.current_language = language
 
 
 
95
  if self.current_speaker != speaker:
96
  self.model.set_utterance_embedding(self.speaker_path_lookup[speaker])
97
  self.current_speaker = speaker
@@ -105,19 +111,31 @@ article = "<p style='text-align: left'>This is still a work in progress, models
105
  iface = gr.Interface(fn=meta_model.read,
106
  inputs=[gr.inputs.Textbox(lines=2,
107
  placeholder="write what you want the synthesis to read here... (2000 character maximum)",
108
- label=" "),
109
- gr.inputs.Dropdown(['English',
110
- 'German',
111
- 'Greek',
112
- 'Spanish',
113
- 'Finnish',
114
- 'Russian',
115
- 'Hungarian',
116
- 'Dutch',
117
- 'French',
118
- 'Polish',
119
- 'Portuguese',
120
- 'Italian'], type="value", default='English', label="Language Selection"),
 
 
 
 
 
 
 
 
 
 
 
 
121
  gr.inputs.Dropdown(["English Speaker's Voice",
122
  "German Speaker's Voice",
123
  "Greek Speaker's Voice",
@@ -129,7 +147,7 @@ iface = gr.Interface(fn=meta_model.read,
129
  "French Speaker's Voice",
130
  "Polish Speaker's Voice",
131
  "Portuguese Speaker's Voice",
132
- "Italian Speaker's Voice"], type="value", default="English Speaker's Voice", label="Speaker Selection")],
133
  outputs=gr.outputs.Audio(type="numpy", label=None),
134
  layout="vertical",
135
  title="IMS Toucan Multilingual Multispeaker Demo",
 
33
  self.model = Meta_FastSpeech2(device=self.device)
34
  self.current_speaker = "English Speaker's Voice"
35
  self.current_language = "English"
36
+ self.current_accent = "English"
37
  self.language_id_lookup = {
38
  "English" : "en",
39
  "German" : "de",
 
63
  "Italian Speaker's Voice" : "reference_audios/italian.flac",
64
  }
65
 
66
+ def read(self, prompt, language, accent, speaker):
67
+ language = language.rstrip(" Text")
68
+ accent = accent.rstrip(" Accent")
69
  if len(prompt) > 2000:
70
  if language == "English":
71
  prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
 
93
  prompt = "Il tuo input era troppo lungo. Per favore, prova un testo più corto o dividilo in più parti."
94
 
95
  if self.current_language != language:
96
+ self.model.set_phonemizer_language(self.language_id_lookup[language])
97
  self.current_language = language
98
+ if self.current_accent != accent:
99
+ self.model.set_accent_language(self.language_id_lookup[language])
100
+ self.current_accent = accent
101
  if self.current_speaker != speaker:
102
  self.model.set_utterance_embedding(self.speaker_path_lookup[speaker])
103
  self.current_speaker = speaker
 
111
  iface = gr.Interface(fn=meta_model.read,
112
  inputs=[gr.inputs.Textbox(lines=2,
113
  placeholder="write what you want the synthesis to read here... (2000 character maximum)",
114
+ label="Text input"),
115
+ gr.inputs.Dropdown(['English Text',
116
+ 'German Text',
117
+ 'Greek Text',
118
+ 'Spanish Text',
119
+ 'Finnish Text',
120
+ 'Russian Text',
121
+ 'Hungarian Text',
122
+ 'Dutch Text',
123
+ 'French Text',
124
+ 'Polish Text',
125
+ 'Portuguese Text',
126
+ 'Italian Text'], type="value", default='English Text', label="Select the Language of the Text"),
127
+ gr.inputs.Dropdown(['English Accent',
128
+ 'German Accent',
129
+ 'Greek Accent',
130
+ 'Spanish Accent',
131
+ 'Finnish Accent',
132
+ 'Russian Accent',
133
+ 'Hungarian Accent',
134
+ 'Dutch Accent',
135
+ 'French Accent',
136
+ 'Polish Accent',
137
+ 'Portuguese Accent',
138
+ 'Italian Accent'], type="value", default='English Accent', label="Select the Accent of the Speaker"),
139
  gr.inputs.Dropdown(["English Speaker's Voice",
140
  "German Speaker's Voice",
141
  "Greek Speaker's Voice",
 
147
  "French Speaker's Voice",
148
  "Polish Speaker's Voice",
149
  "Portuguese Speaker's Voice",
150
+ "Italian Speaker's Voice"], type="value", default="English Speaker's Voice", label="Select the Voice of the Speaker")],
151
  outputs=gr.outputs.Audio(type="numpy", label=None),
152
  layout="vertical",
153
  title="IMS Toucan Multilingual Multispeaker Demo",