Spaces:

ChaolongYang
/

KDTalker

Running on Zero

App Files Files Community

ChaolongYang commited on Apr 3

Commit

af9e4bd

verified ·

1 Parent(s): 4841a80

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -16

app.py CHANGED Viewed

@@ -32,6 +32,14 @@ def set_tts():
 def create_kd_talker():
     return Inferencer()
 @spaces.GPU
 def predict(prompt, upload_reference_audio, microphone_reference_audio, reference_audio_type):
     global result_dir
@@ -60,28 +68,26 @@ def main():
         device = "cuda"
     else:
         device = "cpu"
     with gr.Blocks(analytics_enabled=False) as interface:
-        gr.HTML(
-        """
-            <div align='center'>
-                <h2> Unlock Pose Diversity: Accurate and Efficient Implicit Keypoint-based Spatiotemporal Diffusion for Audio-driven Talking Portrait </h2>
-                <div style="display: flex; justify-content: center; align-items: center; gap: 20px;">
-                    <img src='https://newstatic.dukekunshan.edu.cn/mainsite/2021/08/07161629/large_dku-Logo-e1649298929570.png' alt='Logo' width='150'/>
-                    <img src='https://www.xjtlu.edu.cn/wp-content/uploads/2023/12/7c52fd62e9cf26cb493faa7f91c2782.png' width='250'/>
                 </div>
-            </div>
-        """
-        )
-        driven_audio_type = gr.Textbox(value="upload", visible=False)
-        reference_audio_type = gr.Textbox(value="upload", visible=False)
         with gr.Row():
             with gr.Column(variant="panel"):
                 with gr.Tabs(elem_id="kdtalker_source_image"):
                     with gr.TabItem("Upload image"):
                         source_image = gr.Image(label="Source image", sources="upload", type="filepath", scale=256)
                 with gr.Tabs(elem_id="kdtalker_driven_audio"):
                     with gr.TabItem("Upload"):
                         upload_driven_audio = gr.Audio(label="Upload audio", sources="upload", type="filepath")
@@ -117,8 +123,23 @@ def main():
                                 ],
                                 outputs=[gen_video]
                             )
     return interface
 demo = main()
-demo.queue().launch()

 def create_kd_talker():
     return Inferencer()
+example_folder = "example"
+example_choices = ["Example 1", "Example 2", "Example 3"]
+example_mapping = {
+    "Example 1": {"audio": os.path.join(example_folder, "example1.wav"), "image": os.path.join(example_folder, "example1.png")},
+    "Example 2": {"audio": os.path.join(example_folder, "example2.wav"), "image": os.path.join(example_folder, "example2.png")},
+    "Example 3": {"audio": os.path.join(example_folder, "example3.wav"), "image": os.path.join(example_folder, "example3.png")},
+}
 @spaces.GPU
 def predict(prompt, upload_reference_audio, microphone_reference_audio, reference_audio_type):
     global result_dir
         device = "cuda"
     else:
         device = "cpu"
     with gr.Blocks(analytics_enabled=False) as interface:
+        with gr.Row():
+            gr.HTML(
+            """
+                <div align='center'>
+                    <h2> Unlock Pose Diversity: Accurate and Efficient Implicit Keypoint-based Spatiotemporal Diffusion for Audio-driven Talking Portrait </h2>
+                    <div style="display: flex; justify-content: center; align-items: center; gap: 20px;">
+                        <img src='https://newstatic.dukekunshan.edu.cn/mainsite/2021/08/07161629/large_dku-Logo-e1649298929570.png' alt='Logo' width='150'/>
+                        <img src='https://www.xjtlu.edu.cn/wp-content/uploads/2023/12/7c52fd62e9cf26cb493faa7f91c2782.png' width='250'/>
+                    </div>
                 </div>
+            """
+            )
+            driven_audio_type = gr.Textbox(value="upload", visible=False)
+            reference_audio_type = gr.Textbox(value="upload", visible=False)
         with gr.Row():
             with gr.Column(variant="panel"):
                 with gr.Tabs(elem_id="kdtalker_source_image"):
                     with gr.TabItem("Upload image"):
                         source_image = gr.Image(label="Source image", sources="upload", type="filepath", scale=256)
                 with gr.Tabs(elem_id="kdtalker_driven_audio"):
                     with gr.TabItem("Upload"):
                         upload_driven_audio = gr.Audio(label="Upload audio", sources="upload", type="filepath")
                                 ],
                                 outputs=[gen_video]
                             )
+                    with gr.TabItem("Example"):
+                        example_choice = gr.Dropdown(choices=example_choices, label="Choose an example")
+                        def load_example(choice):
+                            example = example_mapping.get(choice, {})
+                            audio_path = example.get("audio", "")
+                            image_path = example.get("image", "")
+                            return [audio_path, image_path]
+                        example_choice.change(
+                            fn=load_example,
+                            inputs=[example_choice],
+                            outputs=[upload_driven_audio, source_image]
+                        )
+                        example_choice.change(set_upload, outputs=driven_audio_type)
     return interface
 demo = main()
+demo.queue().launch(share=True)