str20tbl commited on
Commit
1d320dc
·
1 Parent(s): e41f152
Files changed (1) hide show
  1. app.py +0 -19
app.py CHANGED
@@ -3,7 +3,6 @@ import spaces
3
  import gradio as gr
4
  import librosa
5
  import numpy as np
6
- from speechbrain.inference import EncoderClassifier
7
  import torch
8
  from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
9
 
@@ -18,23 +17,6 @@ speaker_embeddings = {
18
  "BDP": "spkemb/speaker2.npy",
19
  }
20
 
21
- spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
22
- device = "cuda" if torch.cuda.is_available() else "cpu"
23
- speaker_model = EncoderClassifier.from_hparams(
24
- source=spk_model_name,
25
- run_opts={"device": device},
26
- savedir=os.path.join("/tmp", spk_model_name),
27
- )
28
-
29
-
30
- def create_speaker_embedding(waveform):
31
- with torch.no_grad():
32
- se = speaker_model.encode_batch(torch.tensor(waveform))
33
- se = torch.nn.functional.normalize(se, dim=2)
34
- se = se.squeeze().cpu().numpy()
35
- return se
36
-
37
-
38
  @spaces.GPU
39
  def predict(text, speaker, audio):
40
  if len(text.strip()) == 0:
@@ -67,7 +49,6 @@ gr.Interface(
67
  fn=predict,
68
  inputs=[
69
  gr.Text(label="Input Text"),
70
- gr.Audio(sources="microphone", type="filepath"),
71
  gr.Radio(label="Speaker", choices=[
72
  "GGP (gwryw-gogledd-pro)",
73
  "BGP (benyw-gogledd-pro)",
 
3
  import gradio as gr
4
  import librosa
5
  import numpy as np
 
6
  import torch
7
  from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
8
 
 
17
  "BDP": "spkemb/speaker2.npy",
18
  }
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @spaces.GPU
21
  def predict(text, speaker, audio):
22
  if len(text.strip()) == 0:
 
49
  fn=predict,
50
  inputs=[
51
  gr.Text(label="Input Text"),
 
52
  gr.Radio(label="Speaker", choices=[
53
  "GGP (gwryw-gogledd-pro)",
54
  "BGP (benyw-gogledd-pro)",