inOXcrm commited on
Commit
616dbaa
·
1 Parent(s): 8b2b538

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  # Load spectrogram generator
3
  from nemo.collections.tts.models import FastPitchModel
4
  spec_generator = FastPitchModel.from_pretrained(model_name="inOXcrm/German_multispeaker_FastPitch_nemo")
@@ -13,16 +14,18 @@ model = HifiGanModel.from_pretrained(model_name="tts_de_hui_hifigan_ft_fastpitch
13
  def generate_audio(speaker_id, input_txt):
14
  sr=44100
15
  parsed = spec_generator.parse(input_txt)
16
- spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker_id)
17
  audio = model.convert_spectrogram_to_audio(spec=spectrogram)
 
 
18
  return (sr, audio)
19
 
20
 
21
  gr.Interface(
22
  generate_audio,
23
  [
24
- gr.Textbox(type="text", value=1, label="Speaker ID (0-5)"),
25
- gr.Textbox(type="text", value=1, label="Input Text")
26
  ],
27
  "audio",
28
  ).launch()
 
1
  import gradio as gr
2
+ import numpy as np
3
  # Load spectrogram generator
4
  from nemo.collections.tts.models import FastPitchModel
5
  spec_generator = FastPitchModel.from_pretrained(model_name="inOXcrm/German_multispeaker_FastPitch_nemo")
 
14
  def generate_audio(speaker_id, input_txt):
15
  sr=44100
16
  parsed = spec_generator.parse(input_txt)
17
+ spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=int(speaker_id))
18
  audio = model.convert_spectrogram_to_audio(spec=spectrogram)
19
+ audio = audio.to('cpu').detach().numpy()[0]
20
+ audio = audio / np.abs(audio).max()
21
  return (sr, audio)
22
 
23
 
24
  gr.Interface(
25
  generate_audio,
26
  [
27
+ gr.Textbox(type="text", value=1, label="Speaker ID (0-4)"),
28
+ gr.Textbox(type="text", value="Hallo", label="Input Text")
29
  ],
30
  "audio",
31
  ).launch()