Spaces:

fffiloni
/

dreamtalk

Running on T4

File size: 3,500 Bytes

import gradio as gr
import subprocess
from moviepy.editor import VideoFileClip

def convert_to_mp4_with_aac(input_path, output_path):
    # Load the video
    video = VideoFileClip(input_path)
    
    # Set the output format to mp4 with AAC codec
    video.write_videofile(output_path, codec="libx264", audio_codec="aac")

    return output_path

def load_audio(audio_listed):
    return f"data/audio/{audio_listed}"

def execute_command(command: str) -> None:
    subprocess.run(command, check=True)

def infer(audio_input, image_path):

    output_name = "acknowledgement_english@M030_front_neutral_level1_001@male_face"

    command = [
        f"python",
        f"inference_for_demo_video.py",
        f"--wav_path={audio_input}",
        f"--style_clip_path=data/style_clip/3DMM/M030_front_neutral_level1_001.mat",
        f"--pose_path=data/pose/RichardShelby_front_neutral_level1_001.mat",
        f"--image_path={image_path}",
        f"--cfg_scale=1.0",
        f"--max_gen_len=30",
        f"--output_name={output_name}"
    ]

    execute_command(command)

    # Convert video to compatible codecs
    input_file = f"output_video/{output_name}.mp4"
    output_file = f"{output_name}.mp4"
    
    result = convert_to_mp4_with_aac(input_file, output_file)
    
    return result

with gr.Blocks() as demo:
    with gr.Column():
        gr.HTML("""
        <h2 style="text-align: center;">DreamTalk</h2>
        <p style="text-align: center;"></p>
        """)
        with gr.Row():
            with gr.Column():
                image_path = gr.Image(label="Image", type="filepath", sources=["upload"])
                gr.Examples(
                    examples = [
                        "data/src_img/uncropped/face3.png",
                        "data/src_img/uncropped/male_face.png",
                        "data/src_img/uncropped/uncut_src_img.jpg"
                    ],
                    inputs=[image_path]
                )
                audio_input = gr.Audio(label="Audio input", type="filepath", sources=["upload"])
                audio_list = gr.Dropdown(
                    label="Choose an audio (optional)",
                    choices=[
                        "German1.wav", "German2.wav", "German3.wav", "German4.wav",
                        "acknowledgement_chinese.m4a", "acknowledgement_english.m4a",
                        "chinese1_haierlizhi.wav", "chinese2_guanyu.wav",
                        "french1.wav", "french2.wav", "french3.wav",
                        "italian1.wav", "italian2.wav", "italian3.wav",
                        "japan1.wav", "japan2.wav", "japan3.wav",
                        "korean1.wav", "korean2.wav", "korean3.wav",
                        "noisy_audio_cafeter_snr_0.wav", "noisy_audio_meeting_snr_0.wav", "noisy_audio_meeting_snr_10.wav", "noisy_audio_meeting_snr_20.wav", "noisy_audio_narrative.wav", "noisy_audio_office_snr_0.wav", "out_of_domain_narrative.wav",
                        "spanish1.wav", "spanish2.wav", "spanish3.wav"
                    ]
                )
                audio_list.change(
                    fn = load_audio,
                    inputs = [audio_list],
                    outputs = [audio_input]
                )
                run_btn = gr.Button("Run")
            with gr.Column():
                output_video = gr.Video(format="mp4")
    
    run_btn.click(
        fn = infer,
        inputs = [audio_input, image_path],
        outputs = [output_video]
    )

demo.launch()