Spaces:
Build error
Build error
lithiumice
commited on
Commit
·
36aab19
1
Parent(s):
0dbf11e
add ht api
Browse files- .gitignore +2 -1
- app.py +23 -6
- requirements.txt +3 -1
- src/gradio_demo.py +1 -1
- src/utils/text2speech.py +20 -6
.gitignore
CHANGED
|
@@ -153,7 +153,8 @@ dmypy.json
|
|
| 153 |
cython_debug/
|
| 154 |
|
| 155 |
results/
|
| 156 |
-
|
|
|
|
| 157 |
gradio_cached_examples/
|
| 158 |
gfpgan/
|
| 159 |
start.sh
|
|
|
|
| 153 |
cython_debug/
|
| 154 |
|
| 155 |
results/
|
| 156 |
+
checkpoints/
|
| 157 |
+
checkpoints_win/
|
| 158 |
gradio_cached_examples/
|
| 159 |
gfpgan/
|
| 160 |
start.sh
|
app.py
CHANGED
|
@@ -15,9 +15,17 @@ def download_model():
|
|
| 15 |
|
| 16 |
def sadtalker_demo():
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
tts_talker = TTSTalker()
|
| 22 |
tts_talker_ht = TTSTalkerPlayHT()
|
| 23 |
|
|
@@ -55,12 +63,16 @@ def sadtalker_demo():
|
|
| 55 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
| 56 |
with gr.TabItem('Play.ht: Upload OR TTS'):
|
| 57 |
with gr.Column(variant='panel'):
|
|
|
|
| 58 |
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
with gr.Column(variant='panel'):
|
| 61 |
input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
|
| 62 |
tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
|
| 63 |
-
tts.click(fn=tts_talker_ht.test, inputs=[input_text], outputs=[driven_audio])
|
| 64 |
|
| 65 |
|
| 66 |
# origin TTS
|
|
@@ -159,10 +171,15 @@ def sadtalker_demo():
|
|
| 159 |
driven_audio,
|
| 160 |
preprocess_type,
|
| 161 |
is_still_mode,
|
| 162 |
-
enhancer
|
|
|
|
|
|
|
|
|
|
| 163 |
outputs=[gen_video],
|
| 164 |
fn=sad_talker.test,
|
| 165 |
-
cache_examples=
|
|
|
|
|
|
|
| 166 |
|
| 167 |
submit.click(
|
| 168 |
fn=sad_talker.test,
|
|
|
|
| 15 |
|
| 16 |
def sadtalker_demo():
|
| 17 |
|
| 18 |
+
# 这部分的逻辑是在我的笔记本上本地运行和在服务器上运行的时候,模型的路径不一样,所以需要做一下判断
|
| 19 |
+
import platform
|
| 20 |
+
if platform.system() != 'Windows':
|
| 21 |
+
download_model()
|
| 22 |
+
|
| 23 |
+
sad_talker = SadTalker(
|
| 24 |
+
lazy_load=True,
|
| 25 |
+
checkpoint_path='./checkpoints' if platform.system() == 'Linux' else 'checkpoints_win',
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
tts_talker = TTSTalker()
|
| 30 |
tts_talker_ht = TTSTalkerPlayHT()
|
| 31 |
|
|
|
|
| 63 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
| 64 |
with gr.TabItem('Play.ht: Upload OR TTS'):
|
| 65 |
with gr.Column(variant='panel'):
|
| 66 |
+
gr.Markdown("find more info here: https://playht.github.io/api-docs-generator/#standard-api-voices")
|
| 67 |
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
| 68 |
+
ht_user_id = gr.Textbox(label="Play.ht User ID", lines=1, placeholder="(required)", value='96tPb0H2cXbobV9u8iLVGyJPUPc2')
|
| 69 |
+
ht_auth_key = gr.Textbox(label="Play.ht Auth Key", lines=1, placeholder="(required)",value='f35fc9d7ce0549a88f6cdc15ec860b6e')
|
| 70 |
+
ht_voice = gr.Textbox(label="Play.ht Voice Type, see <Standard API Voices> section in API doc", lines=1, placeholder="(required)",value='en-US-MichelleNeural')
|
| 71 |
|
| 72 |
with gr.Column(variant='panel'):
|
| 73 |
input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
|
| 74 |
tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
|
| 75 |
+
tts.click(fn=tts_talker_ht.test, inputs=[input_text, ht_user_id, ht_auth_key,ht_voice], outputs=[driven_audio])
|
| 76 |
|
| 77 |
|
| 78 |
# origin TTS
|
|
|
|
| 171 |
driven_audio,
|
| 172 |
preprocess_type,
|
| 173 |
is_still_mode,
|
| 174 |
+
enhancer,
|
| 175 |
+
# ht_user_id,
|
| 176 |
+
# ht_auth_key
|
| 177 |
+
],
|
| 178 |
outputs=[gen_video],
|
| 179 |
fn=sad_talker.test,
|
| 180 |
+
cache_examples=False
|
| 181 |
+
# cache_examples=os.getenv('SYSTEM') == 'spaces'
|
| 182 |
+
) #
|
| 183 |
|
| 184 |
submit.click(
|
| 185 |
fn=sad_talker.test,
|
requirements.txt
CHANGED
|
@@ -20,4 +20,6 @@ basicsr==1.4.2
|
|
| 20 |
facexlib==0.2.5
|
| 21 |
dlib-bin
|
| 22 |
gfpgan
|
| 23 |
-
TTS==0.13.0
|
|
|
|
|
|
|
|
|
| 20 |
facexlib==0.2.5
|
| 21 |
dlib-bin
|
| 22 |
gfpgan
|
| 23 |
+
TTS==0.13.0
|
| 24 |
+
requests
|
| 25 |
+
# tempfile
|
src/gradio_demo.py
CHANGED
|
@@ -127,7 +127,7 @@ class SadTalker():
|
|
| 127 |
del self.audio_to_coeff
|
| 128 |
del self.animate_from_coeff
|
| 129 |
|
| 130 |
-
if torch.cuda.is_available()
|
| 131 |
torch.cuda.empty_cache()
|
| 132 |
torch.cuda.synchronize()
|
| 133 |
import gc; gc.collect()
|
|
|
|
| 127 |
del self.audio_to_coeff
|
| 128 |
del self.animate_from_coeff
|
| 129 |
|
| 130 |
+
if torch.cuda.is_available():
|
| 131 |
torch.cuda.empty_cache()
|
| 132 |
torch.cuda.synchronize()
|
| 133 |
import gc; gc.collect()
|
src/utils/text2speech.py
CHANGED
|
@@ -20,7 +20,6 @@ class TTSTalker():
|
|
| 20 |
|
| 21 |
return tempf.name
|
| 22 |
|
| 23 |
-
import urllib.request
|
| 24 |
import tempfile
|
| 25 |
import requests
|
| 26 |
import json
|
|
@@ -36,16 +35,26 @@ class TTSTalkerPlayHT():
|
|
| 36 |
text = 'hello world'
|
| 37 |
|
| 38 |
self.url = "https://play.ht/api/v1"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
self.headers = {
|
| 40 |
-
'Authorization':
|
| 41 |
-
'X-User-ID':
|
| 42 |
'Content-Type': 'application/json'
|
| 43 |
}
|
| 44 |
|
| 45 |
-
def test(self, text, language='en', **kwargs):
|
| 46 |
payload = json.dumps({
|
| 47 |
"title": "Testing public api convertion",
|
| 48 |
-
"voice":
|
| 49 |
"content": [text],
|
| 50 |
})
|
| 51 |
get_url = self.url+f'/convert'
|
|
@@ -55,6 +64,8 @@ class TTSTalkerPlayHT():
|
|
| 55 |
headers=self.headers,
|
| 56 |
data=payload)
|
| 57 |
|
|
|
|
|
|
|
| 58 |
if response.status_code == 404:
|
| 59 |
print('404')
|
| 60 |
return
|
|
@@ -84,6 +95,8 @@ class TTSTalkerPlayHT():
|
|
| 84 |
headers=self.headers,
|
| 85 |
)
|
| 86 |
|
|
|
|
|
|
|
| 87 |
if response.status_code == 404:
|
| 88 |
print(response.text)
|
| 89 |
print('404')
|
|
@@ -93,7 +106,7 @@ class TTSTalkerPlayHT():
|
|
| 93 |
data = json.loads(response.text)
|
| 94 |
converted = data['converted']
|
| 95 |
if converted != True:
|
| 96 |
-
time.sleep(0
|
| 97 |
continue
|
| 98 |
|
| 99 |
# articleStatus 表示转换完成
|
|
@@ -119,6 +132,7 @@ class TTSTalkerPlayHT():
|
|
| 119 |
|
| 120 |
download_dropbox_url(audioUrl, tempf.name)
|
| 121 |
|
|
|
|
| 122 |
# urllib.request.urlretrieve(audioUrl, tempf.name)
|
| 123 |
|
| 124 |
# response = requests.get(audioUrl)
|
|
|
|
| 20 |
|
| 21 |
return tempf.name
|
| 22 |
|
|
|
|
| 23 |
import tempfile
|
| 24 |
import requests
|
| 25 |
import json
|
|
|
|
| 35 |
text = 'hello world'
|
| 36 |
|
| 37 |
self.url = "https://play.ht/api/v1"
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def test(self, text,
|
| 41 |
+
ht_user_id = '96tPb0H2cXbobV9u8iLVGyJPUPc2',
|
| 42 |
+
ht_auth_key = 'f35fc9d7ce0549a88f6cdc15ec860b6e',
|
| 43 |
+
voice = 'en-US-MichelleNeural',
|
| 44 |
+
**kwargs):
|
| 45 |
+
|
| 46 |
+
print(f'ht_user_id {ht_user_id}')
|
| 47 |
+
print(f'ht_auth_key {ht_auth_key}')
|
| 48 |
+
|
| 49 |
self.headers = {
|
| 50 |
+
'Authorization': ht_auth_key,
|
| 51 |
+
'X-User-ID': ht_user_id,
|
| 52 |
'Content-Type': 'application/json'
|
| 53 |
}
|
| 54 |
|
|
|
|
| 55 |
payload = json.dumps({
|
| 56 |
"title": "Testing public api convertion",
|
| 57 |
+
"voice": voice,
|
| 58 |
"content": [text],
|
| 59 |
})
|
| 60 |
get_url = self.url+f'/convert'
|
|
|
|
| 64 |
headers=self.headers,
|
| 65 |
data=payload)
|
| 66 |
|
| 67 |
+
print(f'convert response.status_code {response.status_code}')
|
| 68 |
+
|
| 69 |
if response.status_code == 404:
|
| 70 |
print('404')
|
| 71 |
return
|
|
|
|
| 95 |
headers=self.headers,
|
| 96 |
)
|
| 97 |
|
| 98 |
+
print(f'articleStatus response.status_code {response.status_code}')
|
| 99 |
+
|
| 100 |
if response.status_code == 404:
|
| 101 |
print(response.text)
|
| 102 |
print('404')
|
|
|
|
| 106 |
data = json.loads(response.text)
|
| 107 |
converted = data['converted']
|
| 108 |
if converted != True:
|
| 109 |
+
time.sleep(1.0)
|
| 110 |
continue
|
| 111 |
|
| 112 |
# articleStatus 表示转换完成
|
|
|
|
| 132 |
|
| 133 |
download_dropbox_url(audioUrl, tempf.name)
|
| 134 |
|
| 135 |
+
# import urllib.request
|
| 136 |
# urllib.request.urlretrieve(audioUrl, tempf.name)
|
| 137 |
|
| 138 |
# response = requests.get(audioUrl)
|