demo
Browse files
app.py
CHANGED
@@ -44,8 +44,8 @@ def predict(text, speaker, audio):
|
|
44 |
speaker_embedding = create_speaker_embedding(audio)
|
45 |
else:
|
46 |
speaker_embedding = np.load(speaker_embeddings[speaker[:3]])
|
47 |
-
speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
|
48 |
|
|
|
49 |
inputs = processor(text=text, return_tensors="pt")
|
50 |
speech = model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
|
51 |
speech = (speech.numpy() * 32767).astype(np.int16)
|
|
|
44 |
speaker_embedding = create_speaker_embedding(audio)
|
45 |
else:
|
46 |
speaker_embedding = np.load(speaker_embeddings[speaker[:3]])
|
|
|
47 |
|
48 |
+
speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
|
49 |
inputs = processor(text=text, return_tensors="pt")
|
50 |
speech = model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
|
51 |
speech = (speech.numpy() * 32767).astype(np.int16)
|