File size: 2,863 Bytes
f2d8fa0
b0c635c
475e087
346d904
701d40f
 
f2d8fa0
60ace2e
c786385
f2d8fa0
9166220
 
f2d8fa0
5cd1297
346d904
60ace2e
f95538c
991d60f
9166220
475e087
b0c635c
 
475e087
f2d8fa0
283777a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
065c9ab
c8bccce
 
 
065c9ab
283777a
c8bccce
 
283777a
 
 
f2d8fa0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
#import requests

from PIL import Image
import os
token = os.environ.get('HF_TOKEN')
whisper_to_gpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
tts = gr.Interface.load(name="spaces/Flux9665/IMS-Toucan")
talking_face = gr.Blocks.load(name="spaces/fffiloni/one-shot-talking-face", api_key=token)



def infer(audio):
    gpt_response = whisper_to_gpt(audio, "translate", fn_index=0)
    #print(gpt_response)
    audio_response = tts(gpt_response[1], "English Text", "English Accent", "English Speaker's Voice", fn_index=0)
    #image = Image.open(r"wise_woman_portrait.png")
    portrait_link = talking_face("wise_woman_portrait.png", audio_response, fn_index=0)
    
    
    #portrait_response = requests.get(portrait_link, headers={'Authorization': 'Bearer ' + token})
    #print(portrait_response.text)
    return portrait_link

title = """
    <div style="text-align: center; max-width: 500px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
            margin-bottom: 10px;
        "
        >
        <h1 style="font-weight: 600; margin-bottom: 7px;">
            GPT Talking Portrait
        </h1>
        </div>
        <p style="margin-bottom: 10px;font-size: 94%;font-weight: 100;line-height: 1.5em;">
        Use Whisper to ask, alive portrait responds  !
        </p>
    </div>
"""

css = '''
    #col-container, #col-container-2 {max-width: 510px; margin-left: auto; margin-right: auto;}
    a {text-decoration-line: underline; font-weight: 600;}
    div#record_btn > .mt-6 {
        margin-top: 0!important;
    }
    div#record_btn > .mt-6 button {
        width: 100%;
        height: 40px;
    }
    .footer {
            margin-bottom: 45px;
            margin-top: 10px;
            text-align: center;
            border-bottom: 1px solid #e5e5e5;
        }
        .footer>p {
            font-size: .8rem;
            display: inline-block;
            padding: 0 10px;
            transform: translateY(10px);
            background: white;
        }
        .dark .footer {
            border-color: #303030;
        }
        .dark .footer>p {
            background: #0b0f19;
        }
'''

with gr.Blocks(css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        
        gr.HTML(title)
        
        gpt_response = gr.Video(label="Talking Portrait response")
             
    with gr.Column(elem_id="col-container-2"):
          
        record_input = gr.Audio(source="microphone",type="filepath", label="Audio input", show_label=True,elem_id="record_btn")

        send_btn = gr.Button("Send my request !")
   
    send_btn.click(infer, inputs=[record_input], outputs=[gpt_response])

demo.queue(max_size=32, concurrency_count=20).launch(debug=True)