Spaces:
Build error
Build error
lithiumice
commited on
Commit
·
f68fadb
1
Parent(s):
a31c0b9
add ht
Browse files- app.py +30 -4
- checkpoints/BFM_Fitting/01_MorphableModel.mat +0 -1
- checkpoints/BFM_Fitting/BFM09_model_info.mat +0 -1
- checkpoints/BFM_Fitting/BFM_exp_idx.mat +0 -1
- checkpoints/BFM_Fitting/BFM_front_idx.mat +0 -1
- checkpoints/BFM_Fitting/Exp_Pca.bin +0 -1
- checkpoints/BFM_Fitting/facemodel_info.mat +0 -1
- checkpoints/BFM_Fitting/select_vertex_id.mat +0 -1
- checkpoints/BFM_Fitting/similarity_Lm3D_all.mat +0 -1
- checkpoints/BFM_Fitting/std_exp.txt +0 -1
- checkpoints/auido2exp_00300-model.pth +0 -1
- checkpoints/auido2pose_00140-model.pth +0 -1
- checkpoints/epoch_20.pth +0 -1
- checkpoints/facevid2vid_00189-model.pth.tar +0 -1
- checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip +0 -1
- checkpoints/hub/checkpoints/s3fd-619a316812.pth +0 -1
- checkpoints/mapping_00229-model.pth.tar +0 -1
- checkpoints/shape_predictor_68_face_landmarks.dat +0 -1
- checkpoints/wav2lip.pth +0 -1
- req.txt +23 -0
- run.sh +1 -0
- src/gradio_demo.py +3 -2
- src/utils/text2speech.py +114 -1
app.py
CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
|
|
4 |
from src.gradio_demo import SadTalker
|
5 |
from src.utils.text2speech import TTSTalker
|
6 |
from huggingface_hub import snapshot_download
|
|
|
7 |
|
8 |
def get_source_image(image):
|
9 |
return image
|
@@ -18,6 +19,7 @@ def sadtalker_demo():
|
|
18 |
|
19 |
sad_talker = SadTalker(lazy_load=True)
|
20 |
tts_talker = TTSTalker()
|
|
|
21 |
|
22 |
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
|
23 |
gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
|
@@ -38,16 +40,40 @@ def sadtalker_demo():
|
|
38 |
with gr.Row():
|
39 |
source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
42 |
-
with gr.TabItem('Upload
|
43 |
with gr.Column(variant='panel'):
|
44 |
-
driven_audio = gr.Audio(label="Input audio
|
45 |
|
46 |
with gr.Column(variant='panel'):
|
47 |
-
input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="
|
48 |
tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
|
49 |
-
tts.click(fn=
|
|
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
with gr.Column(variant='panel'):
|
53 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
|
|
4 |
from src.gradio_demo import SadTalker
|
5 |
from src.utils.text2speech import TTSTalker
|
6 |
from huggingface_hub import snapshot_download
|
7 |
+
from src.utils.text2speech import TTSTalkerPlayHT
|
8 |
|
9 |
def get_source_image(image):
|
10 |
return image
|
|
|
19 |
|
20 |
sad_talker = SadTalker(lazy_load=True)
|
21 |
tts_talker = TTSTalker()
|
22 |
+
tts_talker_ht = TTSTalkerPlayHT()
|
23 |
|
24 |
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
|
25 |
gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
|
|
|
40 |
with gr.Row():
|
41 |
source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
|
42 |
|
43 |
+
# with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
44 |
+
# with gr.TabItem('Upload or Generating from TTS'):
|
45 |
+
# with gr.Column(variant='panel'):
|
46 |
+
# driven_audio = gr.Audio(label="Input audio(.wav/.mp3)", source="upload", type="filepath")
|
47 |
+
|
48 |
+
# with gr.Column(variant='panel'):
|
49 |
+
# input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.")
|
50 |
+
# tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
|
51 |
+
# tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
|
52 |
+
|
53 |
+
|
54 |
+
# ht TTS
|
55 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
56 |
+
with gr.TabItem('Play.ht: Upload OR TTS'):
|
57 |
with gr.Column(variant='panel'):
|
58 |
+
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
59 |
|
60 |
with gr.Column(variant='panel'):
|
61 |
+
input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
|
62 |
tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
|
63 |
+
tts.click(fn=tts_talker_ht.test, inputs=[input_text], outputs=[driven_audio])
|
64 |
+
|
65 |
|
66 |
+
# origin TTS
|
67 |
+
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
68 |
+
with gr.TabItem('Origin: Upload OR TTS'):
|
69 |
+
with gr.Column(variant='panel'):
|
70 |
+
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
71 |
+
|
72 |
+
with gr.Column(variant='panel'):
|
73 |
+
input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
|
74 |
+
tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
|
75 |
+
tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
|
76 |
+
|
77 |
|
78 |
with gr.Column(variant='panel'):
|
79 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
checkpoints/BFM_Fitting/01_MorphableModel.mat
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2
|
|
|
|
checkpoints/BFM_Fitting/BFM09_model_info.mat
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/db8d00544f0b0182f1b8430a3bb87662b3ff674eb33c84e6f52dbe2971adb81b
|
|
|
|
checkpoints/BFM_Fitting/BFM_exp_idx.mat
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/1146e4e9c3bef303a497383aa7974c014fe945c7
|
|
|
|
checkpoints/BFM_Fitting/BFM_front_idx.mat
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b9d7b0953dd1dc5b1e28144610485409ac321f9b
|
|
|
|
checkpoints/BFM_Fitting/Exp_Pca.bin
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726
|
|
|
|
checkpoints/BFM_Fitting/facemodel_info.mat
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/3e516ec7297fa3248098f49ecea10579f4831c0a
|
|
|
|
checkpoints/BFM_Fitting/select_vertex_id.mat
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/5b8b220093d93b133acc94ffed159f31a74854cd
|
|
|
|
checkpoints/BFM_Fitting/similarity_Lm3D_all.mat
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/a0e23588302bc71fc899eef53ff06df5f4df4c1d
|
|
|
|
checkpoints/BFM_Fitting/std_exp.txt
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/767b8de4ea1ca78b6f22b98ff2dee4fa345500bb
|
|
|
|
checkpoints/auido2exp_00300-model.pth
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b7608f0e6b477e50e03ca569ac5b04a841b9217f89d502862fc78fda4e46dec4
|
|
|
|
checkpoints/auido2pose_00140-model.pth
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/4fba6701852dc57efbed25b1e4276e4ff752941860d69fc4429f08a02326ebce
|
|
|
|
checkpoints/epoch_20.pth
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b
|
|
|
|
checkpoints/facevid2vid_00189-model.pth.tar
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbad01d46f0510276dc4521322dde6824a873a4222cd0740c85762e7067ea71d
|
|
|
|
checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/cd938726adb1f15f361263cce2db9cb820c42585fa8796ec72ce19107f369a46
|
|
|
|
checkpoints/hub/checkpoints/s3fd-619a316812.pth
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/619a31681264d3f7f7fc7a16a42cbbe8b23f31a256f75a366e5a1bcd59b33543
|
|
|
|
checkpoints/mapping_00229-model.pth.tar
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1
|
|
|
|
checkpoints/shape_predictor_68_face_landmarks.dat
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
|
|
|
|
checkpoints/wav2lip.pth
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b78b681b68ad9fe6c6fb1debc6ff43ad05834a8af8a62ffc4167b7b34ef63c37
|
|
|
|
req.txt
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
torchvision
|
3 |
+
torchaudio
|
4 |
+
numpy
|
5 |
+
face_alignment
|
6 |
+
imageio
|
7 |
+
imageio-ffmpeg
|
8 |
+
librosa
|
9 |
+
numba
|
10 |
+
resampy
|
11 |
+
pydub
|
12 |
+
scipy
|
13 |
+
kornia
|
14 |
+
tqdm
|
15 |
+
yacs
|
16 |
+
pyyaml
|
17 |
+
joblib
|
18 |
+
scikit-image
|
19 |
+
basicsr
|
20 |
+
facexlib
|
21 |
+
dlib-bin
|
22 |
+
gfpgan
|
23 |
+
TTS
|
run.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python app.py
|
src/gradio_demo.py
CHANGED
@@ -127,8 +127,9 @@ class SadTalker():
|
|
127 |
del self.audio_to_coeff
|
128 |
del self.animate_from_coeff
|
129 |
|
130 |
-
torch.cuda.
|
131 |
-
|
|
|
132 |
import gc; gc.collect()
|
133 |
|
134 |
return return_path
|
|
|
127 |
del self.audio_to_coeff
|
128 |
del self.animate_from_coeff
|
129 |
|
130 |
+
if torch.cuda.is_available() :
|
131 |
+
torch.cuda.empty_cache()
|
132 |
+
torch.cuda.synchronize()
|
133 |
import gc; gc.collect()
|
134 |
|
135 |
return return_path
|
src/utils/text2speech.py
CHANGED
@@ -18,4 +18,117 @@ class TTSTalker():
|
|
18 |
|
19 |
self.tts.tts_to_file(text, speaker=self.tts.speakers[0], language=language, file_path=tempf.name)
|
20 |
|
21 |
-
return tempf.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
self.tts.tts_to_file(text, speaker=self.tts.speakers[0], language=language, file_path=tempf.name)
|
20 |
|
21 |
+
return tempf.name
|
22 |
+
|
23 |
+
import urllib.request
|
24 |
+
import tempfile
|
25 |
+
import requests
|
26 |
+
import json
|
27 |
+
import time
|
28 |
+
|
29 |
+
|
30 |
+
class TTSTalkerPlayHT():
|
31 |
+
def __init__(self) -> None:
|
32 |
+
|
33 |
+
if 0:
|
34 |
+
from easydict import EasyDict
|
35 |
+
self = EasyDict()
|
36 |
+
text = 'hello world'
|
37 |
+
|
38 |
+
self.url = "https://play.ht/api/v1"
|
39 |
+
self.headers = {
|
40 |
+
'Authorization': 'f35fc9d7ce0549a88f6cdc15ec860b6e',
|
41 |
+
'X-User-ID': '96tPb0H2cXbobV9u8iLVGyJPUPc2',
|
42 |
+
'Content-Type': 'application/json'
|
43 |
+
}
|
44 |
+
|
45 |
+
def test(self, text, language='en', **kwargs):
|
46 |
+
payload = json.dumps({
|
47 |
+
"title": "Testing public api convertion",
|
48 |
+
"voice": "en-US-MichelleNeural",
|
49 |
+
"content": [text],
|
50 |
+
})
|
51 |
+
get_url = self.url+f'/convert'
|
52 |
+
response = requests.request(
|
53 |
+
"POST",
|
54 |
+
get_url,
|
55 |
+
headers=self.headers,
|
56 |
+
data=payload)
|
57 |
+
|
58 |
+
if response.status_code == 404:
|
59 |
+
print('404')
|
60 |
+
return
|
61 |
+
|
62 |
+
# transcriptionId 如果成功是马上返回的
|
63 |
+
data = json.loads(response.text)
|
64 |
+
transcriptionId = data['transcriptionId']
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
s_time = time.time()
|
69 |
+
while time.time() - s_time < 10:
|
70 |
+
|
71 |
+
if 0:
|
72 |
+
get_url = self.url+f'/articleStatus?transcriptionId={transcriptionId}'
|
73 |
+
response = requests.get(
|
74 |
+
get_url,
|
75 |
+
headers=self.headers,
|
76 |
+
)
|
77 |
+
else:
|
78 |
+
get_url = self.url+f'/articleStatus'
|
79 |
+
response = requests.get(
|
80 |
+
get_url,
|
81 |
+
params={
|
82 |
+
'transcriptionId': transcriptionId
|
83 |
+
},
|
84 |
+
headers=self.headers,
|
85 |
+
)
|
86 |
+
|
87 |
+
if response.status_code == 404:
|
88 |
+
print(response.text)
|
89 |
+
print('404')
|
90 |
+
return
|
91 |
+
|
92 |
+
# articleStatus返回的不一定马上就有audioUrl
|
93 |
+
data = json.loads(response.text)
|
94 |
+
converted = data['converted']
|
95 |
+
if converted != True:
|
96 |
+
time.sleep(0.5)
|
97 |
+
continue
|
98 |
+
|
99 |
+
# articleStatus 表示转换完成
|
100 |
+
audioUrl = data['audioUrl']
|
101 |
+
|
102 |
+
tempf = tempfile.NamedTemporaryFile(
|
103 |
+
delete = False,
|
104 |
+
suffix = ('.'+'mp3'),
|
105 |
+
)
|
106 |
+
|
107 |
+
|
108 |
+
def download_dropbox_url(url, filepath, chunk_size=1024):
|
109 |
+
|
110 |
+
import requests
|
111 |
+
headers = {'user-agent': 'Wget/1.16 (linux-gnu)'}
|
112 |
+
r = requests.get(url, stream=True, headers=headers)
|
113 |
+
with open(filepath, 'wb') as f:
|
114 |
+
for chunk in r.iter_content(chunk_size=chunk_size):
|
115 |
+
if chunk:
|
116 |
+
f.write(chunk)
|
117 |
+
return filepath
|
118 |
+
|
119 |
+
|
120 |
+
download_dropbox_url(audioUrl, tempf.name)
|
121 |
+
|
122 |
+
# urllib.request.urlretrieve(audioUrl, tempf.name)
|
123 |
+
|
124 |
+
# response = requests.get(audioUrl)
|
125 |
+
# with open(tempf.name, "wb") as f:
|
126 |
+
# f.write(response.content)
|
127 |
+
|
128 |
+
# import subprocess
|
129 |
+
# cmd = f'wget -O {tempf.name} {audioUrl}'
|
130 |
+
# # ['wget', audioUrl, '-O', tempf.name]
|
131 |
+
# subprocess.call(cmd)
|
132 |
+
|
133 |
+
return tempf.name
|
134 |
+
|