demo
Browse files
app.py
CHANGED
@@ -18,14 +18,10 @@ speaker_embeddings = {
|
|
18 |
}
|
19 |
|
20 |
@spaces.GPU
|
21 |
-
def predict(text, speaker,
|
22 |
if len(text.strip()) == 0:
|
23 |
return (16000, np.zeros(0).astype(np.int16))
|
24 |
-
|
25 |
-
speaker_embedding = create_speaker_embedding(audio)
|
26 |
-
else:
|
27 |
-
speaker_embedding = np.load(speaker_embeddings[speaker[:3]])
|
28 |
-
|
29 |
speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
|
30 |
inputs = processor(text=text, return_tensors="pt")
|
31 |
speech = model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
|
@@ -55,6 +51,7 @@ gr.Interface(
|
|
55 |
"BDP (benyw-de-pro)",
|
56 |
],
|
57 |
value="GGP (gwryw-gogledd-pro)"),
|
|
|
58 |
],
|
59 |
outputs=[
|
60 |
gr.Audio(label="Generated Speech", type="numpy"),
|
|
|
18 |
}
|
19 |
|
20 |
@spaces.GPU
|
21 |
+
def predict(text, speaker, mic_audio=None):
|
22 |
if len(text.strip()) == 0:
|
23 |
return (16000, np.zeros(0).astype(np.int16))
|
24 |
+
speaker_embedding = np.load(speaker_embeddings[speaker[:3]])
|
|
|
|
|
|
|
|
|
25 |
speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
|
26 |
inputs = processor(text=text, return_tensors="pt")
|
27 |
speech = model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
|
|
|
51 |
"BDP (benyw-de-pro)",
|
52 |
],
|
53 |
value="GGP (gwryw-gogledd-pro)"),
|
54 |
+
gr.Audio(label="Record Speech", source="microphone", type="numpy"),
|
55 |
],
|
56 |
outputs=[
|
57 |
gr.Audio(label="Generated Speech", type="numpy"),
|