File size: 4,519 Bytes
f2d8fa0
b0c635c
475e087
346d904
701d40f
cf0d196
 
 
701d40f
f2d8fa0
60ace2e
c786385
f2d8fa0
 
cf0d196
346d904
cf0d196
f95538c
991d60f
9166220
b0c635c
 
cf0d196
 
f2d8fa0
283777a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7dec02f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283777a
 
 
 
 
 
 
 
cf0d196
c8bccce
 
 
cf0d196
 
283777a
c8bccce
cf0d196
 
 
 
 
 
 
 
283777a
 
f2d8fa0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import gradio as gr
#import requests

from PIL import Image
import os

from share_btn import community_icon_html, loading_icon_html, share_js

token = os.environ.get('HF_TOKEN')
whisper_to_gpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
tts = gr.Interface.load(name="spaces/Flux9665/IMS-Toucan")
talking_face = gr.Blocks.load(name="spaces/fffiloni/one-shot-talking-face", api_key=token)

def infer(audio):
    whisper_to_gpt_response = whisper_to_gpt(audio, "translate", fn_index=0)
    #print(gpt_response)
    audio_response = tts(whisper_to_gpt_response[1], "English Text", "English Accent", "English Speaker's Voice", fn_index=0)
    #image = Image.open(r"wise_woman_portrait.png")
    portrait_link = talking_face("wise_woman_portrait.png", audio_response, fn_index=0)
    
    #portrait_response = requests.get(portrait_link, headers={'Authorization': 'Bearer ' + token})
    #print(portrait_response.text)
    
    return whisper_to_gpt_response[0], portrait_link, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)

title = """
    <div style="text-align: center; max-width: 500px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
            margin-bottom: 10px;
        "
        >
        <h1 style="font-weight: 600; margin-bottom: 7px;">
            GPT Talking Portrait
        </h1>
        </div>
        <p style="margin-bottom: 10px;font-size: 94%;font-weight: 100;line-height: 1.5em;">
        Use Whisper to ask, alive portrait responds  !
        </p>
    </div>
"""

css = '''
    #col-container, #col-container-2 {max-width: 510px; margin-left: auto; margin-right: auto;}
    a {text-decoration-line: underline; font-weight: 600;}
    div#record_btn > .mt-6 {
        margin-top: 0!important;
    }
    div#record_btn > .mt-6 button {
        width: 100%;
        height: 40px;
    }
    .footer {
            margin-bottom: 45px;
            margin-top: 10px;
            text-align: center;
            border-bottom: 1px solid #e5e5e5;
        }
        .footer>p {
            font-size: .8rem;
            display: inline-block;
            padding: 0 10px;
            transform: translateY(10px);
            background: white;
        }
        .dark .footer {
            border-color: #303030;
        }
        .dark .footer>p {
            background: #0b0f19;
        }
    .animate-spin {
        animation: spin 1s linear infinite;
    }
    @keyframes spin {
        from {
            transform: rotate(0deg);
        }
        to {
            transform: rotate(360deg);
        }
    }
    #share-btn-container {
        display: flex; padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; width: 13rem;
    }
    #share-btn {
        all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.25rem !important; padding-bottom: 0.25rem !important;right:0;
    }
    #share-btn * {
        all: unset;
    }
    #share-btn-container div:nth-child(-n+2){
        width: auto !important;
        min-height: 0px !important;
    }
    #share-btn-container .wrap {
        display: none !important;
    }
'''

with gr.Blocks(css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        
        gr.HTML(title)
        
        gpt_response = gr.Video(label="Talking Portrait response", elem_id="video_out")
             
    with gr.Column(elem_id="col-container-2"):
          
        record_input = gr.Audio(source="microphone",type="filepath", label="Audio input", show_label=True, elem_id="record_btn")
        whisper_tr = gr.Textbox(label="whisper english translation", elem_id="text_inp")

        send_btn = gr.Button("Send my request !")
    
    with gr.Group(elem_id="share-btn-container"):
            community_icon = gr.HTML(community_icon_html, visible=False)
            loading_icon = gr.HTML(loading_icon_html, visible=False)
            share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
    
    send_btn.click(infer, inputs=[record_input], outputs=[whisper_tr, gpt_response, share_button, community_icon, loading_icon])
    share_button.click(None, [], [], _js=share_js)

demo.queue(max_size=32, concurrency_count=20).launch(debug=True)