demo
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import spaces
|
|
3 |
import gradio as gr
|
4 |
import librosa
|
5 |
import numpy as np
|
6 |
-
from speechbrain.inference import EncoderClassifier
|
7 |
import torch
|
8 |
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
|
9 |
|
@@ -18,23 +17,6 @@ speaker_embeddings = {
|
|
18 |
"BDP": "spkemb/speaker2.npy",
|
19 |
}
|
20 |
|
21 |
-
spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
|
22 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
23 |
-
speaker_model = EncoderClassifier.from_hparams(
|
24 |
-
source=spk_model_name,
|
25 |
-
run_opts={"device": device},
|
26 |
-
savedir=os.path.join("/tmp", spk_model_name),
|
27 |
-
)
|
28 |
-
|
29 |
-
|
30 |
-
def create_speaker_embedding(waveform):
|
31 |
-
with torch.no_grad():
|
32 |
-
se = speaker_model.encode_batch(torch.tensor(waveform))
|
33 |
-
se = torch.nn.functional.normalize(se, dim=2)
|
34 |
-
se = se.squeeze().cpu().numpy()
|
35 |
-
return se
|
36 |
-
|
37 |
-
|
38 |
@spaces.GPU
|
39 |
def predict(text, speaker, audio):
|
40 |
if len(text.strip()) == 0:
|
@@ -67,7 +49,6 @@ gr.Interface(
|
|
67 |
fn=predict,
|
68 |
inputs=[
|
69 |
gr.Text(label="Input Text"),
|
70 |
-
gr.Audio(sources="microphone", type="filepath"),
|
71 |
gr.Radio(label="Speaker", choices=[
|
72 |
"GGP (gwryw-gogledd-pro)",
|
73 |
"BGP (benyw-gogledd-pro)",
|
|
|
3 |
import gradio as gr
|
4 |
import librosa
|
5 |
import numpy as np
|
|
|
6 |
import torch
|
7 |
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
|
8 |
|
|
|
17 |
"BDP": "spkemb/speaker2.npy",
|
18 |
}
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
@spaces.GPU
|
21 |
def predict(text, speaker, audio):
|
22 |
if len(text.strip()) == 0:
|
|
|
49 |
fn=predict,
|
50 |
inputs=[
|
51 |
gr.Text(label="Input Text"),
|
|
|
52 |
gr.Radio(label="Speaker", choices=[
|
53 |
"GGP (gwryw-gogledd-pro)",
|
54 |
"BGP (benyw-gogledd-pro)",
|