demo
Browse files
app.py
CHANGED
@@ -17,11 +17,29 @@ speaker_embeddings = {
|
|
17 |
"BDP": "spkemb/speaker2.npy",
|
18 |
}
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
@spaces.GPU
|
21 |
def predict(text, speaker):
|
22 |
if len(text.strip()) == 0:
|
23 |
return (16000, np.zeros(0).astype(np.int16))
|
24 |
speaker_embedding = np.load(speaker_embeddings[speaker[:3]])
|
|
|
25 |
speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
|
26 |
inputs = processor(text=text, return_tensors="pt")
|
27 |
speech = model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
|
|
|
17 |
"BDP": "spkemb/speaker2.npy",
|
18 |
}
|
19 |
|
20 |
+
spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
|
21 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
22 |
+
print(f">>>>> DEVICE {device}")
|
23 |
+
speaker_model = EncoderClassifier.from_hparams(
|
24 |
+
source=spk_model_name,
|
25 |
+
run_opts={"device": device},
|
26 |
+
savedir=os.path.join("/tmp", spk_model_name),
|
27 |
+
)
|
28 |
+
|
29 |
+
|
30 |
+
def create_speaker_embedding(waveform):
|
31 |
+
with torch.no_grad():
|
32 |
+
se = speaker_model.encode_batch(torch.tensor(waveform))
|
33 |
+
se = torch.nn.functional.normalize(se, dim=2)
|
34 |
+
se = se.squeeze().cpu().numpy()
|
35 |
+
return se
|
36 |
+
|
37 |
@spaces.GPU
|
38 |
def predict(text, speaker):
|
39 |
if len(text.strip()) == 0:
|
40 |
return (16000, np.zeros(0).astype(np.int16))
|
41 |
speaker_embedding = np.load(speaker_embeddings[speaker[:3]])
|
42 |
+
speaker_embedding = prepare_dataset(speaker_embedding)
|
43 |
speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
|
44 |
inputs = processor(text=text, return_tensors="pt")
|
45 |
speech = model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
|