Spaces:
Build error
Build error
lithiumice
commited on
Commit
·
36aab19
1
Parent(s):
0dbf11e
add ht api
Browse files- .gitignore +2 -1
- app.py +23 -6
- requirements.txt +3 -1
- src/gradio_demo.py +1 -1
- src/utils/text2speech.py +20 -6
.gitignore
CHANGED
@@ -153,7 +153,8 @@ dmypy.json
|
|
153 |
cython_debug/
|
154 |
|
155 |
results/
|
156 |
-
|
|
|
157 |
gradio_cached_examples/
|
158 |
gfpgan/
|
159 |
start.sh
|
|
|
153 |
cython_debug/
|
154 |
|
155 |
results/
|
156 |
+
checkpoints/
|
157 |
+
checkpoints_win/
|
158 |
gradio_cached_examples/
|
159 |
gfpgan/
|
160 |
start.sh
|
app.py
CHANGED
@@ -15,9 +15,17 @@ def download_model():
|
|
15 |
|
16 |
def sadtalker_demo():
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
tts_talker = TTSTalker()
|
22 |
tts_talker_ht = TTSTalkerPlayHT()
|
23 |
|
@@ -55,12 +63,16 @@ def sadtalker_demo():
|
|
55 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
56 |
with gr.TabItem('Play.ht: Upload OR TTS'):
|
57 |
with gr.Column(variant='panel'):
|
|
|
58 |
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
|
|
|
|
|
|
59 |
|
60 |
with gr.Column(variant='panel'):
|
61 |
input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
|
62 |
tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
|
63 |
-
tts.click(fn=tts_talker_ht.test, inputs=[input_text], outputs=[driven_audio])
|
64 |
|
65 |
|
66 |
# origin TTS
|
@@ -159,10 +171,15 @@ def sadtalker_demo():
|
|
159 |
driven_audio,
|
160 |
preprocess_type,
|
161 |
is_still_mode,
|
162 |
-
enhancer
|
|
|
|
|
|
|
163 |
outputs=[gen_video],
|
164 |
fn=sad_talker.test,
|
165 |
-
cache_examples=
|
|
|
|
|
166 |
|
167 |
submit.click(
|
168 |
fn=sad_talker.test,
|
|
|
15 |
|
16 |
def sadtalker_demo():
|
17 |
|
18 |
+
# 这部分的逻辑是在我的笔记本上本地运行和在服务器上运行的时候,模型的路径不一样,所以需要做一下判断
|
19 |
+
import platform
|
20 |
+
if platform.system() != 'Windows':
|
21 |
+
download_model()
|
22 |
+
|
23 |
+
sad_talker = SadTalker(
|
24 |
+
lazy_load=True,
|
25 |
+
checkpoint_path='./checkpoints' if platform.system() == 'Linux' else 'checkpoints_win',
|
26 |
+
)
|
27 |
+
|
28 |
+
|
29 |
tts_talker = TTSTalker()
|
30 |
tts_talker_ht = TTSTalkerPlayHT()
|
31 |
|
|
|
63 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
64 |
with gr.TabItem('Play.ht: Upload OR TTS'):
|
65 |
with gr.Column(variant='panel'):
|
66 |
+
gr.Markdown("find more info here: https://playht.github.io/api-docs-generator/#standard-api-voices")
|
67 |
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
68 |
+
ht_user_id = gr.Textbox(label="Play.ht User ID", lines=1, placeholder="(required)", value='96tPb0H2cXbobV9u8iLVGyJPUPc2')
|
69 |
+
ht_auth_key = gr.Textbox(label="Play.ht Auth Key", lines=1, placeholder="(required)",value='f35fc9d7ce0549a88f6cdc15ec860b6e')
|
70 |
+
ht_voice = gr.Textbox(label="Play.ht Voice Type, see <Standard API Voices> section in API doc", lines=1, placeholder="(required)",value='en-US-MichelleNeural')
|
71 |
|
72 |
with gr.Column(variant='panel'):
|
73 |
input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
|
74 |
tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
|
75 |
+
tts.click(fn=tts_talker_ht.test, inputs=[input_text, ht_user_id, ht_auth_key,ht_voice], outputs=[driven_audio])
|
76 |
|
77 |
|
78 |
# origin TTS
|
|
|
171 |
driven_audio,
|
172 |
preprocess_type,
|
173 |
is_still_mode,
|
174 |
+
enhancer,
|
175 |
+
# ht_user_id,
|
176 |
+
# ht_auth_key
|
177 |
+
],
|
178 |
outputs=[gen_video],
|
179 |
fn=sad_talker.test,
|
180 |
+
cache_examples=False
|
181 |
+
# cache_examples=os.getenv('SYSTEM') == 'spaces'
|
182 |
+
) #
|
183 |
|
184 |
submit.click(
|
185 |
fn=sad_talker.test,
|
requirements.txt
CHANGED
@@ -20,4 +20,6 @@ basicsr==1.4.2
|
|
20 |
facexlib==0.2.5
|
21 |
dlib-bin
|
22 |
gfpgan
|
23 |
-
TTS==0.13.0
|
|
|
|
|
|
20 |
facexlib==0.2.5
|
21 |
dlib-bin
|
22 |
gfpgan
|
23 |
+
TTS==0.13.0
|
24 |
+
requests
|
25 |
+
# tempfile
|
src/gradio_demo.py
CHANGED
@@ -127,7 +127,7 @@ class SadTalker():
|
|
127 |
del self.audio_to_coeff
|
128 |
del self.animate_from_coeff
|
129 |
|
130 |
-
if torch.cuda.is_available()
|
131 |
torch.cuda.empty_cache()
|
132 |
torch.cuda.synchronize()
|
133 |
import gc; gc.collect()
|
|
|
127 |
del self.audio_to_coeff
|
128 |
del self.animate_from_coeff
|
129 |
|
130 |
+
if torch.cuda.is_available():
|
131 |
torch.cuda.empty_cache()
|
132 |
torch.cuda.synchronize()
|
133 |
import gc; gc.collect()
|
src/utils/text2speech.py
CHANGED
@@ -20,7 +20,6 @@ class TTSTalker():
|
|
20 |
|
21 |
return tempf.name
|
22 |
|
23 |
-
import urllib.request
|
24 |
import tempfile
|
25 |
import requests
|
26 |
import json
|
@@ -36,16 +35,26 @@ class TTSTalkerPlayHT():
|
|
36 |
text = 'hello world'
|
37 |
|
38 |
self.url = "https://play.ht/api/v1"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
self.headers = {
|
40 |
-
'Authorization':
|
41 |
-
'X-User-ID':
|
42 |
'Content-Type': 'application/json'
|
43 |
}
|
44 |
|
45 |
-
def test(self, text, language='en', **kwargs):
|
46 |
payload = json.dumps({
|
47 |
"title": "Testing public api convertion",
|
48 |
-
"voice":
|
49 |
"content": [text],
|
50 |
})
|
51 |
get_url = self.url+f'/convert'
|
@@ -55,6 +64,8 @@ class TTSTalkerPlayHT():
|
|
55 |
headers=self.headers,
|
56 |
data=payload)
|
57 |
|
|
|
|
|
58 |
if response.status_code == 404:
|
59 |
print('404')
|
60 |
return
|
@@ -84,6 +95,8 @@ class TTSTalkerPlayHT():
|
|
84 |
headers=self.headers,
|
85 |
)
|
86 |
|
|
|
|
|
87 |
if response.status_code == 404:
|
88 |
print(response.text)
|
89 |
print('404')
|
@@ -93,7 +106,7 @@ class TTSTalkerPlayHT():
|
|
93 |
data = json.loads(response.text)
|
94 |
converted = data['converted']
|
95 |
if converted != True:
|
96 |
-
time.sleep(0
|
97 |
continue
|
98 |
|
99 |
# articleStatus 表示转换完成
|
@@ -119,6 +132,7 @@ class TTSTalkerPlayHT():
|
|
119 |
|
120 |
download_dropbox_url(audioUrl, tempf.name)
|
121 |
|
|
|
122 |
# urllib.request.urlretrieve(audioUrl, tempf.name)
|
123 |
|
124 |
# response = requests.get(audioUrl)
|
|
|
20 |
|
21 |
return tempf.name
|
22 |
|
|
|
23 |
import tempfile
|
24 |
import requests
|
25 |
import json
|
|
|
35 |
text = 'hello world'
|
36 |
|
37 |
self.url = "https://play.ht/api/v1"
|
38 |
+
|
39 |
+
|
40 |
+
def test(self, text,
|
41 |
+
ht_user_id = '96tPb0H2cXbobV9u8iLVGyJPUPc2',
|
42 |
+
ht_auth_key = 'f35fc9d7ce0549a88f6cdc15ec860b6e',
|
43 |
+
voice = 'en-US-MichelleNeural',
|
44 |
+
**kwargs):
|
45 |
+
|
46 |
+
print(f'ht_user_id {ht_user_id}')
|
47 |
+
print(f'ht_auth_key {ht_auth_key}')
|
48 |
+
|
49 |
self.headers = {
|
50 |
+
'Authorization': ht_auth_key,
|
51 |
+
'X-User-ID': ht_user_id,
|
52 |
'Content-Type': 'application/json'
|
53 |
}
|
54 |
|
|
|
55 |
payload = json.dumps({
|
56 |
"title": "Testing public api convertion",
|
57 |
+
"voice": voice,
|
58 |
"content": [text],
|
59 |
})
|
60 |
get_url = self.url+f'/convert'
|
|
|
64 |
headers=self.headers,
|
65 |
data=payload)
|
66 |
|
67 |
+
print(f'convert response.status_code {response.status_code}')
|
68 |
+
|
69 |
if response.status_code == 404:
|
70 |
print('404')
|
71 |
return
|
|
|
95 |
headers=self.headers,
|
96 |
)
|
97 |
|
98 |
+
print(f'articleStatus response.status_code {response.status_code}')
|
99 |
+
|
100 |
if response.status_code == 404:
|
101 |
print(response.text)
|
102 |
print('404')
|
|
|
106 |
data = json.loads(response.text)
|
107 |
converted = data['converted']
|
108 |
if converted != True:
|
109 |
+
time.sleep(1.0)
|
110 |
continue
|
111 |
|
112 |
# articleStatus 表示转换完成
|
|
|
132 |
|
133 |
download_dropbox_url(audioUrl, tempf.name)
|
134 |
|
135 |
+
# import urllib.request
|
136 |
# urllib.request.urlretrieve(audioUrl, tempf.name)
|
137 |
|
138 |
# response = requests.get(audioUrl)
|