File size: 2,863 Bytes
f2d8fa0 b0c635c 475e087 346d904 701d40f f2d8fa0 60ace2e c786385 f2d8fa0 9166220 f2d8fa0 5cd1297 346d904 60ace2e f95538c 991d60f 9166220 475e087 b0c635c 475e087 f2d8fa0 283777a 065c9ab c8bccce 065c9ab 283777a c8bccce 283777a f2d8fa0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import gradio as gr
#import requests
from PIL import Image
import os
token = os.environ.get('HF_TOKEN')
whisper_to_gpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
tts = gr.Interface.load(name="spaces/Flux9665/IMS-Toucan")
talking_face = gr.Blocks.load(name="spaces/fffiloni/one-shot-talking-face", api_key=token)
def infer(audio):
gpt_response = whisper_to_gpt(audio, "translate", fn_index=0)
#print(gpt_response)
audio_response = tts(gpt_response[1], "English Text", "English Accent", "English Speaker's Voice", fn_index=0)
#image = Image.open(r"wise_woman_portrait.png")
portrait_link = talking_face("wise_woman_portrait.png", audio_response, fn_index=0)
#portrait_response = requests.get(portrait_link, headers={'Authorization': 'Bearer ' + token})
#print(portrait_response.text)
return portrait_link
title = """
<div style="text-align: center; max-width: 500px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
margin-bottom: 10px;
"
>
<h1 style="font-weight: 600; margin-bottom: 7px;">
GPT Talking Portrait
</h1>
</div>
<p style="margin-bottom: 10px;font-size: 94%;font-weight: 100;line-height: 1.5em;">
Use Whisper to ask, alive portrait responds !
</p>
</div>
"""
css = '''
#col-container, #col-container-2 {max-width: 510px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
div#record_btn > .mt-6 {
margin-top: 0!important;
}
div#record_btn > .mt-6 button {
width: 100%;
height: 40px;
}
.footer {
margin-bottom: 45px;
margin-top: 10px;
text-align: center;
border-bottom: 1px solid #e5e5e5;
}
.footer>p {
font-size: .8rem;
display: inline-block;
padding: 0 10px;
transform: translateY(10px);
background: white;
}
.dark .footer {
border-color: #303030;
}
.dark .footer>p {
background: #0b0f19;
}
'''
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
gpt_response = gr.Video(label="Talking Portrait response")
with gr.Column(elem_id="col-container-2"):
record_input = gr.Audio(source="microphone",type="filepath", label="Audio input", show_label=True,elem_id="record_btn")
send_btn = gr.Button("Send my request !")
send_btn.click(infer, inputs=[record_input], outputs=[gpt_response])
demo.queue(max_size=32, concurrency_count=20).launch(debug=True)
|