Steveeeeeeen HF staff commited on
Commit
22b9e3b
·
verified ·
1 Parent(s): 497563a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -5
app.py CHANGED
@@ -36,13 +36,26 @@ whisper_turbo_pipe = pipeline(
36
  )
37
 
38
  SPEAKERS = {
39
- "Male 1": {
40
  "path": "speakers/female_1.mp3",
41
  "transcript": "e lo stesso alessi che andò ad aprire non riconobbe antoni il quale tornava con la sporta sotto il braccio tanto era mutato coperto di polvere e con la barba lungacome fu entrato e si fu messo a sedere in un cantuccio non osavano quasi fargli festa.",
42
  "description": "Una voce femminile.",
43
  },
 
 
 
 
 
 
 
 
 
 
44
  }
45
 
 
 
 
46
  def preview_speaker(display_name):
47
  """Returns the audio and transcript for preview"""
48
  speaker_name = speaker_display_dict[display_name]
@@ -158,13 +171,30 @@ def infer(sample_audio_path, target_text, progress=gr.Progress()):
158
 
159
  with gr.Blocks() as app_tts:
160
  gr.Markdown("# Zero Shot Voice Clone TTS")
161
- ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
 
 
 
 
 
 
 
 
162
  gen_text_input = gr.Textbox(label="Text to Generate", lines=10)
163
-
164
  generate_btn = gr.Button("Synthesize", variant="primary")
165
-
166
  audio_output = gr.Audio(label="Synthesized Audio")
167
 
 
 
 
 
 
 
 
 
 
 
 
168
  generate_btn.click(
169
  infer,
170
  inputs=[
@@ -183,7 +213,7 @@ with gr.Blocks() as app_credits:
183
  """)
184
 
185
  with gr.Blocks() as app:
186
- gr.HTML("<img src='https://huggingface.co/datasets/Steveeeeeeen/random_images/blob/main/llasagna.png' alt='Llasagna' style='width: 100%; height: auto;'>", elem_id="banner")
187
  gr.Markdown(
188
  """
189
  # Llasagna 1b TTS
 
36
  )
37
 
38
  SPEAKERS = {
39
+ "Female 1": {
40
  "path": "speakers/female_1.mp3",
41
  "transcript": "e lo stesso alessi che andò ad aprire non riconobbe antoni il quale tornava con la sporta sotto il braccio tanto era mutato coperto di polvere e con la barba lungacome fu entrato e si fu messo a sedere in un cantuccio non osavano quasi fargli festa.",
42
  "description": "Una voce femminile.",
43
  },
44
+ "Male 1": {
45
+ "path": "speakers/male_1.mp3",
46
+ "transcript": "Hello, this is a sample voice recording for demonstration purposes.",
47
+ "description": "A male voice with neutral accent.",
48
+ },
49
+ "Female 2": {
50
+ "path": "speakers/female_2.mp3",
51
+ "transcript": "This is another sample recording to showcase the voice cloning capabilities.",
52
+ "description": "A female voice with clear articulation.",
53
+ },
54
  }
55
 
56
+ banner_url = "https://huggingface.co/datasets/Steveeeeeeen/random_images/resolve/main/llasagna.png"
57
+ BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 150px; max-width: 300px;"> </div>'
58
+
59
  def preview_speaker(display_name):
60
  """Returns the audio and transcript for preview"""
61
  speaker_name = speaker_display_dict[display_name]
 
171
 
172
  with gr.Blocks() as app_tts:
173
  gr.Markdown("# Zero Shot Voice Clone TTS")
174
+
175
+ with gr.Row():
176
+ ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
177
+ speaker_dropdown = gr.Dropdown(
178
+ choices=list(SPEAKERS.keys()),
179
+ label="Or select a predefined speaker",
180
+ value=None
181
+ )
182
+
183
  gen_text_input = gr.Textbox(label="Text to Generate", lines=10)
 
184
  generate_btn = gr.Button("Synthesize", variant="primary")
 
185
  audio_output = gr.Audio(label="Synthesized Audio")
186
 
187
+ def update_audio(speaker):
188
+ if speaker in SPEAKERS:
189
+ return SPEAKERS[speaker]["path"]
190
+ return None
191
+
192
+ speaker_dropdown.change(
193
+ fn=update_audio,
194
+ inputs=[speaker_dropdown],
195
+ outputs=[ref_audio_input]
196
+ )
197
+
198
  generate_btn.click(
199
  infer,
200
  inputs=[
 
213
  """)
214
 
215
  with gr.Blocks() as app:
216
+ gr.HTML(BANNER, elem_id="banner")
217
  gr.Markdown(
218
  """
219
  # Llasagna 1b TTS