Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -33,6 +33,7 @@ class TTS_Interface:
|
|
33 |
self.model = Meta_FastSpeech2(device=self.device)
|
34 |
self.current_speaker = "English Speaker's Voice"
|
35 |
self.current_language = "English"
|
|
|
36 |
self.language_id_lookup = {
|
37 |
"English" : "en",
|
38 |
"German" : "de",
|
@@ -62,7 +63,9 @@ class TTS_Interface:
|
|
62 |
"Italian Speaker's Voice" : "reference_audios/italian.flac",
|
63 |
}
|
64 |
|
65 |
-
def read(self, prompt, language, speaker):
|
|
|
|
|
66 |
if len(prompt) > 2000:
|
67 |
if language == "English":
|
68 |
prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
|
@@ -90,8 +93,11 @@ class TTS_Interface:
|
|
90 |
prompt = "Il tuo input era troppo lungo. Per favore, prova un testo più corto o dividilo in più parti."
|
91 |
|
92 |
if self.current_language != language:
|
93 |
-
self.model.
|
94 |
self.current_language = language
|
|
|
|
|
|
|
95 |
if self.current_speaker != speaker:
|
96 |
self.model.set_utterance_embedding(self.speaker_path_lookup[speaker])
|
97 |
self.current_speaker = speaker
|
@@ -105,19 +111,31 @@ article = "<p style='text-align: left'>This is still a work in progress, models
|
|
105 |
iface = gr.Interface(fn=meta_model.read,
|
106 |
inputs=[gr.inputs.Textbox(lines=2,
|
107 |
placeholder="write what you want the synthesis to read here... (2000 character maximum)",
|
108 |
-
label=" "),
|
109 |
-
gr.inputs.Dropdown(['English',
|
110 |
-
'German',
|
111 |
-
'Greek',
|
112 |
-
'Spanish',
|
113 |
-
'Finnish',
|
114 |
-
'Russian',
|
115 |
-
'Hungarian',
|
116 |
-
'Dutch',
|
117 |
-
'French',
|
118 |
-
'Polish',
|
119 |
-
'Portuguese',
|
120 |
-
'Italian'], type="value", default='English', label="Language
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
gr.inputs.Dropdown(["English Speaker's Voice",
|
122 |
"German Speaker's Voice",
|
123 |
"Greek Speaker's Voice",
|
@@ -129,7 +147,7 @@ iface = gr.Interface(fn=meta_model.read,
|
|
129 |
"French Speaker's Voice",
|
130 |
"Polish Speaker's Voice",
|
131 |
"Portuguese Speaker's Voice",
|
132 |
-
"Italian Speaker's Voice"], type="value", default="English Speaker's Voice", label="Speaker
|
133 |
outputs=gr.outputs.Audio(type="numpy", label=None),
|
134 |
layout="vertical",
|
135 |
title="IMS Toucan Multilingual Multispeaker Demo",
|
|
|
33 |
self.model = Meta_FastSpeech2(device=self.device)
|
34 |
self.current_speaker = "English Speaker's Voice"
|
35 |
self.current_language = "English"
|
36 |
+
self.current_accent = "English"
|
37 |
self.language_id_lookup = {
|
38 |
"English" : "en",
|
39 |
"German" : "de",
|
|
|
63 |
"Italian Speaker's Voice" : "reference_audios/italian.flac",
|
64 |
}
|
65 |
|
66 |
+
def read(self, prompt, language, accent, speaker):
|
67 |
+
language = language.rstrip(" Text")
|
68 |
+
accent = accent.rstrip(" Accent")
|
69 |
if len(prompt) > 2000:
|
70 |
if language == "English":
|
71 |
prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
|
|
|
93 |
prompt = "Il tuo input era troppo lungo. Per favore, prova un testo più corto o dividilo in più parti."
|
94 |
|
95 |
if self.current_language != language:
|
96 |
+
self.model.set_phonemizer_language(self.language_id_lookup[language])
|
97 |
self.current_language = language
|
98 |
+
if self.current_accent != accent:
|
99 |
+
self.model.set_accent_language(self.language_id_lookup[language])
|
100 |
+
self.current_accent = accent
|
101 |
if self.current_speaker != speaker:
|
102 |
self.model.set_utterance_embedding(self.speaker_path_lookup[speaker])
|
103 |
self.current_speaker = speaker
|
|
|
111 |
iface = gr.Interface(fn=meta_model.read,
|
112 |
inputs=[gr.inputs.Textbox(lines=2,
|
113 |
placeholder="write what you want the synthesis to read here... (2000 character maximum)",
|
114 |
+
label="Text input"),
|
115 |
+
gr.inputs.Dropdown(['English Text',
|
116 |
+
'German Text',
|
117 |
+
'Greek Text',
|
118 |
+
'Spanish Text',
|
119 |
+
'Finnish Text',
|
120 |
+
'Russian Text',
|
121 |
+
'Hungarian Text',
|
122 |
+
'Dutch Text',
|
123 |
+
'French Text',
|
124 |
+
'Polish Text',
|
125 |
+
'Portuguese Text',
|
126 |
+
'Italian Text'], type="value", default='English Text', label="Select the Language of the Text"),
|
127 |
+
gr.inputs.Dropdown(['English Accent',
|
128 |
+
'German Accent',
|
129 |
+
'Greek Accent',
|
130 |
+
'Spanish Accent',
|
131 |
+
'Finnish Accent',
|
132 |
+
'Russian Accent',
|
133 |
+
'Hungarian Accent',
|
134 |
+
'Dutch Accent',
|
135 |
+
'French Accent',
|
136 |
+
'Polish Accent',
|
137 |
+
'Portuguese Accent',
|
138 |
+
'Italian Accent'], type="value", default='English Accent', label="Select the Accent of the Speaker"),
|
139 |
gr.inputs.Dropdown(["English Speaker's Voice",
|
140 |
"German Speaker's Voice",
|
141 |
"Greek Speaker's Voice",
|
|
|
147 |
"French Speaker's Voice",
|
148 |
"Polish Speaker's Voice",
|
149 |
"Portuguese Speaker's Voice",
|
150 |
+
"Italian Speaker's Voice"], type="value", default="English Speaker's Voice", label="Select the Voice of the Speaker")],
|
151 |
outputs=gr.outputs.Audio(type="numpy", label=None),
|
152 |
layout="vertical",
|
153 |
title="IMS Toucan Multilingual Multispeaker Demo",
|