Abrahamau commited on
Commit
c70c2b2
·
verified ·
1 Parent(s): 1c46d88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -29,8 +29,10 @@ def text2speech(model, text):
29
  speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
30
 
31
  speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
32
- print(speech, type(speech) )
33
- return speech["audio"], speech["sampling_rate"]
 
 
34
 
35
  radio1 = gr.Radio(["microsoft/resnet-50", "google/vit-base-patch16-224", "apple/mobilevit-small"], label="Select a Classifier", info="Image Classifier")
36
  tab1 = gr.Interface(
@@ -50,7 +52,7 @@ radio3 = gr.Radio(["microsoft/speecht5_tts"], label="Select an tts", info="Age C
50
  tab3 = gr.Interface(
51
  fn=text2speech,
52
  inputs=[radio3, "text"],
53
- outputs=[gr.Audio(label="Generated Speech", type="numpy")],
54
  )
55
 
56
  demo = gr.TabbedInterface([tab1, tab2, tab3], ["tab1", "tab2", "tab3"])
 
29
  speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
30
 
31
  speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
32
+ audio_data = np.frombuffer(speech["audio"], dtype=np.float32)
33
+ audio_data_16bit = (audio_data * 32767).astype(np.int16)
34
+
35
+ return sampling_rate, audio_data_16bit
36
 
37
  radio1 = gr.Radio(["microsoft/resnet-50", "google/vit-base-patch16-224", "apple/mobilevit-small"], label="Select a Classifier", info="Image Classifier")
38
  tab1 = gr.Interface(
 
52
  tab3 = gr.Interface(
53
  fn=text2speech,
54
  inputs=[radio3, "text"],
55
+ outputs=["audio"],
56
  )
57
 
58
  demo = gr.TabbedInterface([tab1, tab2, tab3], ["tab1", "tab2", "tab3"])