str20tbl commited on
Commit
e5cd8de
·
1 Parent(s): 1d320dc
Files changed (1) hide show
  1. app.py +3 -6
app.py CHANGED
@@ -18,14 +18,10 @@ speaker_embeddings = {
18
  }
19
 
20
  @spaces.GPU
21
- def predict(text, speaker, audio):
22
  if len(text.strip()) == 0:
23
  return (16000, np.zeros(0).astype(np.int16))
24
- if audio is not None:
25
- speaker_embedding = create_speaker_embedding(audio)
26
- else:
27
- speaker_embedding = np.load(speaker_embeddings[speaker[:3]])
28
-
29
  speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
30
  inputs = processor(text=text, return_tensors="pt")
31
  speech = model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
@@ -55,6 +51,7 @@ gr.Interface(
55
  "BDP (benyw-de-pro)",
56
  ],
57
  value="GGP (gwryw-gogledd-pro)"),
 
58
  ],
59
  outputs=[
60
  gr.Audio(label="Generated Speech", type="numpy"),
 
18
  }
19
 
20
  @spaces.GPU
21
+ def predict(text, speaker, mic_audio=None):
22
  if len(text.strip()) == 0:
23
  return (16000, np.zeros(0).astype(np.int16))
24
+ speaker_embedding = np.load(speaker_embeddings[speaker[:3]])
 
 
 
 
25
  speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
26
  inputs = processor(text=text, return_tensors="pt")
27
  speech = model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
 
51
  "BDP (benyw-de-pro)",
52
  ],
53
  value="GGP (gwryw-gogledd-pro)"),
54
+ gr.Audio(label="Record Speech", source="microphone", type="numpy"),
55
  ],
56
  outputs=[
57
  gr.Audio(label="Generated Speech", type="numpy"),