Spaces:
				
			
			
	
			
			
		Build error
		
	
	
	
			
			
	
	
	
	
		
		
		Build error
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -482,32 +482,9 @@ character_name_to_yaml = { 
     | 
|
| 482 | 
         
             
              "101099-00_18_09-00_18_19.mp4": "./datasets/data_json/show_oliver_test/Stupid_Watergate_-_Last_Week_Tonight_with_John_Oliver_HBO-FVFdsl29s_Q.mkv.json",
         
     | 
| 483 | 
         
             
            }
         
     | 
| 484 | 
         | 
| 485 | 
         
            -
             
     | 
| 486 | 
         
            -
             
     | 
| 487 | 
         
            -
             
     | 
| 488 | 
         
            -
                    "./emage/smplx_models/", 
         
     | 
| 489 | 
         
            -
                    model_type='smplx',
         
     | 
| 490 | 
         
            -
                    gender='NEUTRAL_2020', 
         
     | 
| 491 | 
         
            -
                    use_face_contour=False,
         
     | 
| 492 | 
         
            -
                    num_betas=300,
         
     | 
| 493 | 
         
            -
                    num_expression_coeffs=100, 
         
     | 
| 494 | 
         
            -
                    ext='npz',
         
     | 
| 495 | 
         
            -
                    use_pca=False,
         
     | 
| 496 | 
         
            -
                )
         
     | 
| 497 | 
         
            -
            model = init_class(cfg.model.name_pyfile, cfg.model.class_name, cfg)
         
     | 
| 498 | 
         
            -
            for param in model.parameters():
         
     | 
| 499 | 
         
            -
                param.requires_grad = False
         
     | 
| 500 | 
         
            -
            model.smplx_model = smplx_model
         
     | 
| 501 | 
         
            -
            model.get_motion_reps = get_motion_reps_tensor
         
     | 
| 502 | 
         
            -
             
     | 
| 503 | 
         
            -
            checkpoint_path = "./datasets/cached_ckpts/ckpt.pth"
         
     | 
| 504 | 
         
            -
            checkpoint = torch.load(checkpoint_path)
         
     | 
| 505 | 
         
            -
            state_dict = checkpoint['model_state_dict']
         
     | 
| 506 | 
         
            -
            # new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
         
     | 
| 507 | 
         
            -
            model.load_state_dict(state_dict, strict=False)
         
     | 
| 508 | 
         
            -
             
     | 
| 509 | 
         
            -
            @spaces.GPU(duration=299) 
         
     | 
| 510 | 
         
            -
            def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None, smplx_model=smplx_model, model=model, cfg=cfg):
         
     | 
| 511 | 
         
             
                cfg.seed = seed
         
     | 
| 512 | 
         
             
                seed_everything(cfg.seed)
         
     | 
| 513 | 
         
             
                experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
         
     | 
| 
         @@ -542,13 +519,35 @@ def tango(audio_path, character_name, seed, create_graph=False, video_folder_pat 
     | 
|
| 542 | 
         
             
                    os.system(f"python ./create_graph.py --json_save_path {json_save_path} --graph_save_path {graph_save_path}") 
         
     | 
| 543 | 
         
             
                    cfg.data.test_meta_paths = json_save_path
         
     | 
| 544 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 545 | 
         
             
                local_rank = 0  
         
     | 
| 546 | 
         
             
                torch.cuda.set_device(local_rank)
         
     | 
| 547 | 
         
             
                device = torch.device("cuda", local_rank)
         
     | 
| 548 | 
         
            -
             
     | 
| 549 | 
         
             
                smplx_model = smplx_model.to(device).eval()
         
     | 
| 550 | 
         
             
                model = model.to(device)
         
     | 
| 551 | 
         
             
                model.smplx_model = model.smplx_model.to(device)
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 552 | 
         | 
| 553 | 
         
             
                test_path = os.path.join(experiment_ckpt_dir, f"test_{0}")
         
     | 
| 554 | 
         
             
                os.makedirs(test_path, exist_ok=True)
         
     | 
| 
         @@ -572,7 +571,11 @@ examples_video = [ 
     | 
|
| 572 | 
         
             
            ]
         
     | 
| 573 | 
         | 
| 574 | 
         
             
            combined_examples = [
         
     | 
| 575 | 
         
            -
                [ 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 576 | 
         
             
            ]
         
     | 
| 577 | 
         | 
| 578 | 
         
             
            def make_demo():
         
     | 
| 
         @@ -594,31 +597,39 @@ def make_demo(): 
     | 
|
| 594 | 
         
             
                                <a style='font-size:18px;color: #000000' href=''>[Github Repo]</a>\
         
     | 
| 595 | 
         
             
                                    <a style='font-size:18px;color: #000000' href=''> [ArXiv] </a>\
         
     | 
| 596 | 
         
             
                                    <a style='font-size:18px;color: #000000' href='https://pantomatrix.github.io/TANGO/'> [Project Page] </a> </div>
         
     | 
| 
         | 
|
| 
         | 
|
| 597 | 
         
             
                        """
         
     | 
| 598 | 
         
             
                    )
         
     | 
| 599 | 
         | 
| 600 | 
         
            -
                    gr.Markdown("""
         
     | 
| 601 | 
         
            -
                    <h4 style="text-align: left;">
         
     | 
| 602 | 
         
            -
                    This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
         
     | 
| 603 | 
         | 
| 604 | 
         
            -
                    Details of the low-quality mode:
         
     | 
| 605 | 
         
            -
                    1. Lower resolution.
         
     | 
| 606 | 
         
            -
                    2. More discontinuous frames (causing noticeable "frame jumps").
         
     | 
| 607 | 
         
            -
                    3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
         
     | 
| 608 | 
         
            -
                    4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
         
     | 
| 609 | 
         
            -
                    5. You can provide a custom background video for your character, but it is limited to 20 seconds.
         
     | 
| 610 | 
         | 
| 611 | 
         
            -
                    Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
         
     | 
| 612 | 
         
            -
                    </h4>
         
     | 
| 613 | 
         
            -
                    """)
         
     | 
| 614 | 
         | 
| 615 | 
         
             
                    # Create a gallery with 5 videos
         
     | 
| 616 | 
         
             
                    with gr.Row():
         
     | 
| 617 | 
         
            -
                        video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo  
     | 
| 618 | 
         
            -
                        video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo  
     | 
| 619 | 
         
            -
                        video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo  
     | 
| 620 | 
         
            -
                        video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo  
     | 
| 621 | 
         
            -
                        video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 622 | 
         | 
| 623 | 
         | 
| 624 | 
         
             
                    with gr.Row():
         
     | 
| 
         @@ -635,12 +646,31 @@ def make_demo(): 
     | 
|
| 635 | 
         
             
                                        loop=False,
         
     | 
| 636 | 
         
             
                                        show_share_button=True)
         
     | 
| 637 | 
         
             
                        with gr.Column(scale=1):
         
     | 
| 638 | 
         
            -
                            file_output_1 = gr.File(label="Download Motion and Visualize in Blender")
         
     | 
| 639 | 
         
            -
                            file_output_2 = gr.File(label="Download Motion and Visualize in Blender")
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 640 | 
         | 
| 641 | 
         
             
                    with gr.Row():
         
     | 
| 642 | 
         
             
                        with gr.Column(scale=1):
         
     | 
| 643 | 
         
             
                            audio_input = gr.Audio(label="Upload your audio")
         
     | 
| 
         | 
|
| 644 | 
         
             
                        with gr.Column(scale=2):
         
     | 
| 645 | 
         
             
                            gr.Examples(
         
     | 
| 646 | 
         
             
                                examples=examples_audio,
         
     | 
| 
         @@ -659,9 +689,7 @@ def make_demo(): 
     | 
|
| 659 | 
         
             
                                label="Character Examples",
         
     | 
| 660 | 
         
             
                                cache_examples=False
         
     | 
| 661 | 
         
             
                            )
         
     | 
| 662 | 
         
            -
                     
     | 
| 663 | 
         
            -
                        seed_input = gr.Number(label="Seed", value=2024, interactive=True)
         
     | 
| 664 | 
         
            -
             
     | 
| 665 | 
         
             
                    # Fourth row: Generate video button
         
     | 
| 666 | 
         
             
                    with gr.Row():
         
     | 
| 667 | 
         
             
                        run_button = gr.Button("Generate Video")
         
     | 
| 
         | 
|
| 482 | 
         
             
              "101099-00_18_09-00_18_19.mp4": "./datasets/data_json/show_oliver_test/Stupid_Watergate_-_Last_Week_Tonight_with_John_Oliver_HBO-FVFdsl29s_Q.mkv.json",
         
     | 
| 483 | 
         
             
            }
         
     | 
| 484 | 
         | 
| 485 | 
         
            +
            @spaces.GPU(duration=240) 
         
     | 
| 486 | 
         
            +
            def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
         
     | 
| 487 | 
         
            +
                cfg = prepare_all("./configs/gradio.yaml")
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 488 | 
         
             
                cfg.seed = seed
         
     | 
| 489 | 
         
             
                seed_everything(cfg.seed)
         
     | 
| 490 | 
         
             
                experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
         
     | 
| 
         | 
|
| 519 | 
         
             
                    os.system(f"python ./create_graph.py --json_save_path {json_save_path} --graph_save_path {graph_save_path}") 
         
     | 
| 520 | 
         
             
                    cfg.data.test_meta_paths = json_save_path
         
     | 
| 521 | 
         | 
| 522 | 
         
            +
                smplx_model = smplx.create(
         
     | 
| 523 | 
         
            +
                    "./emage/smplx_models/", 
         
     | 
| 524 | 
         
            +
                    model_type='smplx',
         
     | 
| 525 | 
         
            +
                    gender='NEUTRAL_2020', 
         
     | 
| 526 | 
         
            +
                    use_face_contour=False,
         
     | 
| 527 | 
         
            +
                    num_betas=300,
         
     | 
| 528 | 
         
            +
                    num_expression_coeffs=100, 
         
     | 
| 529 | 
         
            +
                    ext='npz',
         
     | 
| 530 | 
         
            +
                    use_pca=False,
         
     | 
| 531 | 
         
            +
                )
         
     | 
| 532 | 
         
            +
                model = init_class(cfg.model.name_pyfile, cfg.model.class_name, cfg)
         
     | 
| 533 | 
         
            +
                for param in model.parameters():
         
     | 
| 534 | 
         
            +
                    param.requires_grad = False
         
     | 
| 535 | 
         
            +
                model.smplx_model = smplx_model
         
     | 
| 536 | 
         
            +
                model.get_motion_reps = get_motion_reps_tensor
         
     | 
| 537 | 
         
            +
                
         
     | 
| 538 | 
         
             
                local_rank = 0  
         
     | 
| 539 | 
         
             
                torch.cuda.set_device(local_rank)
         
     | 
| 540 | 
         
             
                device = torch.device("cuda", local_rank)
         
     | 
| 541 | 
         
            +
             
     | 
| 542 | 
         
             
                smplx_model = smplx_model.to(device).eval()
         
     | 
| 543 | 
         
             
                model = model.to(device)
         
     | 
| 544 | 
         
             
                model.smplx_model = model.smplx_model.to(device)
         
     | 
| 545 | 
         
            +
             
     | 
| 546 | 
         
            +
                checkpoint_path = "./datasets/cached_ckpts/ckpt.pth"
         
     | 
| 547 | 
         
            +
                checkpoint = torch.load(checkpoint_path)
         
     | 
| 548 | 
         
            +
                state_dict = checkpoint['model_state_dict']
         
     | 
| 549 | 
         
            +
                new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
         
     | 
| 550 | 
         
            +
                model.load_state_dict(new_state_dict, strict=False)
         
     | 
| 551 | 
         | 
| 552 | 
         
             
                test_path = os.path.join(experiment_ckpt_dir, f"test_{0}")
         
     | 
| 553 | 
         
             
                os.makedirs(test_path, exist_ok=True)
         
     | 
| 
         | 
|
| 571 | 
         
             
            ]
         
     | 
| 572 | 
         | 
| 573 | 
         
             
            combined_examples = [
         
     | 
| 574 | 
         
            +
                ["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/speaker9_o7Ik1OB4TaE_00-00-38.15_00-00-42.33.mp4", 2024],
         
     | 
| 575 | 
         
            +
                ["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/speaker7_iuYlGRnC7J8_00-00-0.00_00-00-3.25.mp4", 2024],
         
     | 
| 576 | 
         
            +
                ["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
         
     | 
| 577 | 
         
            +
                ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/1wrQ6Msp7wM_00-00-39.69_00-00-45.68.mp4", 2024],
         
     | 
| 578 | 
         
            +
                ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/speaker8_jjRWaMCWs44_00-00-30.16_00-00-33.32.mp4", 2024],
         
     | 
| 579 | 
         
             
            ]
         
     | 
| 580 | 
         | 
| 581 | 
         
             
            def make_demo():
         
     | 
| 
         | 
|
| 597 | 
         
             
                                <a style='font-size:18px;color: #000000' href=''>[Github Repo]</a>\
         
     | 
| 598 | 
         
             
                                    <a style='font-size:18px;color: #000000' href=''> [ArXiv] </a>\
         
     | 
| 599 | 
         
             
                                    <a style='font-size:18px;color: #000000' href='https://pantomatrix.github.io/TANGO/'> [Project Page] </a> </div>
         
     | 
| 600 | 
         
            +
                                </h2> \
         
     | 
| 601 | 
         
            +
                                <a style='font-size:18px;color: #000000'>This is an open-source project supported by Hugging Face's free ZeroGPU. Runtime is limited to 300s, so it operates in low-quality mode. Some high-quality mode results are shown below. </a> </div>
         
     | 
| 602 | 
         
             
                        """
         
     | 
| 603 | 
         
             
                    )
         
     | 
| 604 | 
         | 
| 605 | 
         
            +
                    # gr.Markdown("""
         
     | 
| 606 | 
         
            +
                    # <h4 style="text-align: left;">
         
     | 
| 607 | 
         
            +
                    # This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
         
     | 
| 608 | 
         | 
| 609 | 
         
            +
                    # Details of the low-quality mode:
         
     | 
| 610 | 
         
            +
                    # 1. Lower resolution.
         
     | 
| 611 | 
         
            +
                    # 2. More discontinuous frames (causing noticeable "frame jumps").
         
     | 
| 612 | 
         
            +
                    # 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
         
     | 
| 613 | 
         
            +
                    # 4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
         
     | 
| 614 | 
         
            +
                    # 5. You can provide a custom background video for your character, but it is limited to 20 seconds.
         
     | 
| 615 | 
         | 
| 616 | 
         
            +
                    # Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
         
     | 
| 617 | 
         
            +
                    # </h4>
         
     | 
| 618 | 
         
            +
                    # """)
         
     | 
| 619 | 
         | 
| 620 | 
         
             
                    # Create a gallery with 5 videos
         
     | 
| 621 | 
         
             
                    with gr.Row():
         
     | 
| 622 | 
         
            +
                        video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0")
         
     | 
| 623 | 
         
            +
                        video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1")
         
     | 
| 624 | 
         
            +
                        video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2")
         
     | 
| 625 | 
         
            +
                        video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3")
         
     | 
| 626 | 
         
            +
                        video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4")
         
     | 
| 627 | 
         
            +
                    with gr.Row():
         
     | 
| 628 | 
         
            +
                        video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5")
         
     | 
| 629 | 
         
            +
                        video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6")
         
     | 
| 630 | 
         
            +
                        video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
         
     | 
| 631 | 
         
            +
                        video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
         
     | 
| 632 | 
         
            +
                        video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
         
     | 
| 633 | 
         | 
| 634 | 
         | 
| 635 | 
         
             
                    with gr.Row():
         
     | 
| 
         | 
|
| 646 | 
         
             
                                        loop=False,
         
     | 
| 647 | 
         
             
                                        show_share_button=True)
         
     | 
| 648 | 
         
             
                        with gr.Column(scale=1):
         
     | 
| 649 | 
         
            +
                            file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
         
     | 
| 650 | 
         
            +
                            file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
         
     | 
| 651 | 
         
            +
                            gr.Markdown("""
         
     | 
| 652 | 
         
            +
                            <h4 style="text-align: left;">
         
     | 
| 653 | 
         
            +
                            <a style='font-size:18px;color: #000000'> Details of the low-quality mode: </a>
         
     | 
| 654 | 
         
            +
                            <br>
         
     | 
| 655 | 
         
            +
                            <a style='font-size:18px;color: #000000'> 1. Lower resolution.</a>
         
     | 
| 656 | 
         
            +
                            <br>
         
     | 
| 657 | 
         
            +
                            <a style='font-size:18px;color: #000000'> 2. More discontinuous graph nodes (causing noticeable "frame jumps"). </a>
         
     | 
| 658 | 
         
            +
                            <br>
         
     | 
| 659 | 
         
            +
                            <a style='font-size:18px;color: #000000'> 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing. </a>
         
     | 
| 660 | 
         
            +
                            <br>
         
     | 
| 661 | 
         
            +
                            <a style='font-size:18px;color: #000000'> 4. only use first 8 seconds of your input audio.</a>
         
     | 
| 662 | 
         
            +
                            <br>
         
     | 
| 663 | 
         
            +
                            <a style='font-size:18px;color: #000000'> 5. custom character for a video up to 10 seconds. </a>
         
     | 
| 664 | 
         
            +
                            <br>
         
     | 
| 665 | 
         
            +
                            <br>
         
     | 
| 666 | 
         
            +
                            <a style='font-size:18px;color: #000000'> Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.</a>
         
     | 
| 667 | 
         
            +
                            </h4>
         
     | 
| 668 | 
         
            +
                            """)
         
     | 
| 669 | 
         | 
| 670 | 
         
             
                    with gr.Row():
         
     | 
| 671 | 
         
             
                        with gr.Column(scale=1):
         
     | 
| 672 | 
         
             
                            audio_input = gr.Audio(label="Upload your audio")
         
     | 
| 673 | 
         
            +
                            seed_input = gr.Number(label="Seed", value=2024, interactive=True)
         
     | 
| 674 | 
         
             
                        with gr.Column(scale=2):
         
     | 
| 675 | 
         
             
                            gr.Examples(
         
     | 
| 676 | 
         
             
                                examples=examples_audio,
         
     | 
| 
         | 
|
| 689 | 
         
             
                                label="Character Examples",
         
     | 
| 690 | 
         
             
                                cache_examples=False
         
     | 
| 691 | 
         
             
                            )
         
     | 
| 692 | 
         
            +
                    
         
     | 
| 
         | 
|
| 
         | 
|
| 693 | 
         
             
                    # Fourth row: Generate video button
         
     | 
| 694 | 
         
             
                    with gr.Row():
         
     | 
| 695 | 
         
             
                        run_button = gr.Button("Generate Video")
         
     |