ChaolongYang commited on
Commit
af9e4bd
·
verified ·
1 Parent(s): 4841a80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -16
app.py CHANGED
@@ -32,6 +32,14 @@ def set_tts():
32
  def create_kd_talker():
33
  return Inferencer()
34
 
 
 
 
 
 
 
 
 
35
  @spaces.GPU
36
  def predict(prompt, upload_reference_audio, microphone_reference_audio, reference_audio_type):
37
  global result_dir
@@ -60,28 +68,26 @@ def main():
60
  device = "cuda"
61
  else:
62
  device = "cpu"
63
-
64
  with gr.Blocks(analytics_enabled=False) as interface:
65
- gr.HTML(
66
- """
67
- <div align='center'>
68
- <h2> Unlock Pose Diversity: Accurate and Efficient Implicit Keypoint-based Spatiotemporal Diffusion for Audio-driven Talking Portrait </h2>
69
- <div style="display: flex; justify-content: center; align-items: center; gap: 20px;">
70
- <img src='https://newstatic.dukekunshan.edu.cn/mainsite/2021/08/07161629/large_dku-Logo-e1649298929570.png' alt='Logo' width='150'/>
71
- <img src='https://www.xjtlu.edu.cn/wp-content/uploads/2023/12/7c52fd62e9cf26cb493faa7f91c2782.png' width='250'/>
 
 
72
  </div>
73
- </div>
74
- """
75
- )
76
- driven_audio_type = gr.Textbox(value="upload", visible=False)
77
- reference_audio_type = gr.Textbox(value="upload", visible=False)
78
-
79
  with gr.Row():
80
  with gr.Column(variant="panel"):
81
  with gr.Tabs(elem_id="kdtalker_source_image"):
82
  with gr.TabItem("Upload image"):
83
  source_image = gr.Image(label="Source image", sources="upload", type="filepath", scale=256)
84
-
85
  with gr.Tabs(elem_id="kdtalker_driven_audio"):
86
  with gr.TabItem("Upload"):
87
  upload_driven_audio = gr.Audio(label="Upload audio", sources="upload", type="filepath")
@@ -117,8 +123,23 @@ def main():
117
  ],
118
  outputs=[gen_video]
119
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  return interface
121
 
122
 
123
  demo = main()
124
- demo.queue().launch()
 
32
  def create_kd_talker():
33
  return Inferencer()
34
 
35
+ example_folder = "example"
36
+ example_choices = ["Example 1", "Example 2", "Example 3"]
37
+ example_mapping = {
38
+ "Example 1": {"audio": os.path.join(example_folder, "example1.wav"), "image": os.path.join(example_folder, "example1.png")},
39
+ "Example 2": {"audio": os.path.join(example_folder, "example2.wav"), "image": os.path.join(example_folder, "example2.png")},
40
+ "Example 3": {"audio": os.path.join(example_folder, "example3.wav"), "image": os.path.join(example_folder, "example3.png")},
41
+ }
42
+
43
  @spaces.GPU
44
  def predict(prompt, upload_reference_audio, microphone_reference_audio, reference_audio_type):
45
  global result_dir
 
68
  device = "cuda"
69
  else:
70
  device = "cpu"
 
71
  with gr.Blocks(analytics_enabled=False) as interface:
72
+ with gr.Row():
73
+ gr.HTML(
74
+ """
75
+ <div align='center'>
76
+ <h2> Unlock Pose Diversity: Accurate and Efficient Implicit Keypoint-based Spatiotemporal Diffusion for Audio-driven Talking Portrait </h2>
77
+ <div style="display: flex; justify-content: center; align-items: center; gap: 20px;">
78
+ <img src='https://newstatic.dukekunshan.edu.cn/mainsite/2021/08/07161629/large_dku-Logo-e1649298929570.png' alt='Logo' width='150'/>
79
+ <img src='https://www.xjtlu.edu.cn/wp-content/uploads/2023/12/7c52fd62e9cf26cb493faa7f91c2782.png' width='250'/>
80
+ </div>
81
  </div>
82
+ """
83
+ )
84
+ driven_audio_type = gr.Textbox(value="upload", visible=False)
85
+ reference_audio_type = gr.Textbox(value="upload", visible=False)
 
 
86
  with gr.Row():
87
  with gr.Column(variant="panel"):
88
  with gr.Tabs(elem_id="kdtalker_source_image"):
89
  with gr.TabItem("Upload image"):
90
  source_image = gr.Image(label="Source image", sources="upload", type="filepath", scale=256)
 
91
  with gr.Tabs(elem_id="kdtalker_driven_audio"):
92
  with gr.TabItem("Upload"):
93
  upload_driven_audio = gr.Audio(label="Upload audio", sources="upload", type="filepath")
 
123
  ],
124
  outputs=[gen_video]
125
  )
126
+ with gr.TabItem("Example"):
127
+ example_choice = gr.Dropdown(choices=example_choices, label="Choose an example")
128
+ def load_example(choice):
129
+ example = example_mapping.get(choice, {})
130
+ audio_path = example.get("audio", "")
131
+ image_path = example.get("image", "")
132
+ return [audio_path, image_path]
133
+ example_choice.change(
134
+ fn=load_example,
135
+ inputs=[example_choice],
136
+ outputs=[upload_driven_audio, source_image]
137
+ )
138
+ example_choice.change(set_upload, outputs=driven_audio_type)
139
+
140
+
141
  return interface
142
 
143
 
144
  demo = main()
145
+ demo.queue().launch(share=True)