Fabrice-TIERCELIN commited on
Commit
3001020
·
verified ·
1 Parent(s): 29a24a3

Recode the interface into block

Browse files
Files changed (1) hide show
  1. app.py +35 -32
app.py CHANGED
@@ -82,9 +82,10 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic):
82
  "output.wav",
83
  )
84
 
85
- title = "Multi-language Text-to-Speech"
86
-
87
- description = f"""
 
88
  <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
89
  <br/>
90
  XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
@@ -98,21 +99,15 @@ Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, wh
98
  <a href="https://huggingface.co/spaces/coqui/xtts?duplicate=true">
99
  <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
100
  </p>
101
- """
102
-
103
- article = ""
104
- examples = [
105
- ]
106
-
107
- gr.Interface(
108
- fn=predict,
109
- inputs=[
110
- gr.Textbox(
111
  label="Text Prompt",
112
  info="One or two sentences at a time is better",
113
  value="Hello, World! Here is an example of light voice cloning. Try to upload your best audio samples quality",
114
- ),
115
- gr.Dropdown(
116
  label="Language",
117
  info="Select an output language for the synthesised speech",
118
  choices=[
@@ -132,27 +127,35 @@ gr.Interface(
132
  ],
133
  max_choices=1,
134
  value="en",
135
- ),
136
- gr.Audio(
137
  label="Reference Audio",
138
  #info="Click on the ✎ button to upload your own target speaker audio",
139
  type="filepath",
140
  value="examples/female.wav",
141
- ),
142
- gr.Audio(sources=["microphone"],
143
  type="filepath",
144
  #info="Use your microphone to record audio",
145
- label="Use Microphone for Reference"),
146
- gr.Checkbox(label="Check to use Microphone as Reference",
147
  value=False,
148
- info="Notice: Microphone input may not work properly under traffic",),
149
- ],
150
- outputs=[
151
- gr.Video(label="Waveform Visual", autoplay=True),
152
- gr.Audio(label="Synthesised Audio", autoplay=False),
153
- ],
154
- title=title,
155
- description=description,
156
- article=article,
157
- examples=examples,
158
- ).queue().launch(debug=True)
 
 
 
 
 
 
 
 
 
82
  "output.wav",
83
  )
84
 
85
+ with gr.Blocks() as interface:
86
+ gr.HTML("Multi-language Text-to-Speech")
87
+ gr.HTML(
88
+ """
89
  <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
90
  <br/>
91
  XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
 
99
  <a href="https://huggingface.co/spaces/coqui/xtts?duplicate=true">
100
  <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
101
  </p>
102
+ """
103
+ )
104
+ with gr.Column():
105
+ prompt = gr.Textbox(
 
 
 
 
 
 
106
  label="Text Prompt",
107
  info="One or two sentences at a time is better",
108
  value="Hello, World! Here is an example of light voice cloning. Try to upload your best audio samples quality",
109
+ )
110
+ language = gr.Dropdown(
111
  label="Language",
112
  info="Select an output language for the synthesised speech",
113
  choices=[
 
127
  ],
128
  max_choices=1,
129
  value="en",
130
+ )
131
+ audio_file_pth = gr.Audio(
132
  label="Reference Audio",
133
  #info="Click on the ✎ button to upload your own target speaker audio",
134
  type="filepath",
135
  value="examples/female.wav",
136
+ )
137
+ mic_file_path = gr.Audio(sources=["microphone"],
138
  type="filepath",
139
  #info="Use your microphone to record audio",
140
+ label="Use Microphone for Reference")
141
+ use_mic = gr.Checkbox(label="Check to use Microphone as Reference",
142
  value=False,
143
+ info="Notice: Microphone input may not work properly under traffic",)
144
+ with gr.Accordion("Advanced options", open = False):
145
+ debug_mode = gr.Checkbox(label = "Debug mode", value = False, info = "Show intermediate results")
146
+
147
+ submit = gr.Button("🚀 Speak", variant = "primary")
148
+
149
+ waveform_visual = gr.Video(label="Waveform Visual", autoplay=True)
150
+ synthesised_audio = gr.Audio(label="Synthesised Audio", autoplay=False)
151
+ information = gr.HTML()
152
+
153
+ submit.click(predict, inputs = [
154
+ prompt, language, audio_file_pth, mic_file_path, use_mic
155
+ ], outputs = [
156
+ waveform_visual,
157
+ synthesised_audio,
158
+ information
159
+ ], scroll_to_output = True)
160
+
161
+ interface.queue().launch(debug=True)