File size: 3,500 Bytes
1cd3497
 
2768572
79883d7
2768572
 
 
 
 
 
537ba12
2768572
79883d7
c42d6a9
 
 
1cd3497
 
 
ab7ed04
1cd3497
 
 
 
 
 
ab7ed04
30dd27d
 
ab7ed04
30dd27d
 
1cd3497
 
 
 
 
79883d7
2768572
 
 
 
 
 
1cd3497
 
 
f905233
 
 
 
1cd3497
 
06a5b99
f905233
 
 
 
 
 
 
 
ab7ed04
c42d6a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b98523
1cd3497
8ea3b63
1cd3497
 
 
ab7ed04
1cd3497
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
import subprocess
from moviepy.editor import VideoFileClip

def convert_to_mp4_with_aac(input_path, output_path):
    # Load the video
    video = VideoFileClip(input_path)
    
    # Set the output format to mp4 with AAC codec
    video.write_videofile(output_path, codec="libx264", audio_codec="aac")

    return output_path

def load_audio(audio_listed):
    return f"data/audio/{audio_listed}"

def execute_command(command: str) -> None:
    subprocess.run(command, check=True)

def infer(audio_input, image_path):

    output_name = "acknowledgement_english@M030_front_neutral_level1_001@male_face"

    command = [
        f"python",
        f"inference_for_demo_video.py",
        f"--wav_path={audio_input}",
        f"--style_clip_path=data/style_clip/3DMM/M030_front_neutral_level1_001.mat",
        f"--pose_path=data/pose/RichardShelby_front_neutral_level1_001.mat",
        f"--image_path={image_path}",
        f"--cfg_scale=1.0",
        f"--max_gen_len=30",
        f"--output_name={output_name}"
    ]

    execute_command(command)

    # Convert video to compatible codecs
    input_file = f"output_video/{output_name}.mp4"
    output_file = f"{output_name}.mp4"
    
    result = convert_to_mp4_with_aac(input_file, output_file)
    
    return result

with gr.Blocks() as demo:
    with gr.Column():
        gr.HTML("""
        <h2 style="text-align: center;">DreamTalk</h2>
        <p style="text-align: center;"></p>
        """)
        with gr.Row():
            with gr.Column():
                image_path = gr.Image(label="Image", type="filepath", sources=["upload"])
                gr.Examples(
                    examples = [
                        "data/src_img/uncropped/face3.png",
                        "data/src_img/uncropped/male_face.png",
                        "data/src_img/uncropped/uncut_src_img.jpg"
                    ],
                    inputs=[image_path]
                )
                audio_input = gr.Audio(label="Audio input", type="filepath", sources=["upload"])
                audio_list = gr.Dropdown(
                    label="Choose an audio (optional)",
                    choices=[
                        "German1.wav", "German2.wav", "German3.wav", "German4.wav",
                        "acknowledgement_chinese.m4a", "acknowledgement_english.m4a",
                        "chinese1_haierlizhi.wav", "chinese2_guanyu.wav",
                        "french1.wav", "french2.wav", "french3.wav",
                        "italian1.wav", "italian2.wav", "italian3.wav",
                        "japan1.wav", "japan2.wav", "japan3.wav",
                        "korean1.wav", "korean2.wav", "korean3.wav",
                        "noisy_audio_cafeter_snr_0.wav", "noisy_audio_meeting_snr_0.wav", "noisy_audio_meeting_snr_10.wav", "noisy_audio_meeting_snr_20.wav", "noisy_audio_narrative.wav", "noisy_audio_office_snr_0.wav", "out_of_domain_narrative.wav",
                        "spanish1.wav", "spanish2.wav", "spanish3.wav"
                    ]
                )
                audio_list.change(
                    fn = load_audio,
                    inputs = [audio_list],
                    outputs = [audio_input]
                )
                run_btn = gr.Button("Run")
            with gr.Column():
                output_video = gr.Video(format="mp4")
    
    run_btn.click(
        fn = infer,
        inputs = [audio_input, image_path],
        outputs = [output_video]
    )

demo.launch()