Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Upload 2 files
Browse files- demo/MotionDirector_gradio.py +35 -12
- demo/motiondirector.py +17 -17
    	
        demo/MotionDirector_gradio.py
    CHANGED
    
    | @@ -21,12 +21,14 @@ with gr.Blocks() as demo: | |
| 21 | 
             
                    </a>
         | 
| 22 | 
             
                    <div>
         | 
| 23 | 
             
                        <h1 >MotionDirector: Motion Customization of Text-to-Video Diffusion Models</h1>
         | 
| 24 | 
            -
                        <h5 style="margin: 0;">More MotionDirectors are on the way. Stay tuned  | 
|  | |
| 25 | 
             
                        </br>
         | 
| 26 | 
             
                        <div style="display: flex; justify-content: center; align-items: center; text-align: center;>
         | 
| 27 | 
            -
                            <a href="https://arxiv.org/abs/2310.08465" | 
| 28 | 
            -
                            <a href= | 
| 29 | 
            -
                            <a href= | 
|  | |
| 30 | 
             
                        </div>
         | 
| 31 | 
             
                    </div>
         | 
| 32 | 
             
                    </div>
         | 
| @@ -43,15 +45,24 @@ with gr.Blocks() as demo: | |
| 43 |  | 
| 44 | 
             
                with gr.Row():
         | 
| 45 | 
             
                    model_select = gr.Dropdown(
         | 
| 46 | 
            -
                        ["1-1: [Cinematic Shots] --  | 
| 47 | 
             
                         "1-2: [Cinematic Shots] -- Zoom In",
         | 
| 48 | 
             
                         "1-3: [Cinematic Shots] -- Zoom Out",
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 49 | 
             
                         "2-1: [Object Trajectory] -- Right to Left",
         | 
| 50 | 
             
                         "2-2: [Object Trajectory] -- Left to Right",
         | 
| 51 | 
             
                         "3-1: [Sports Concepts] -- Riding Bicycle",
         | 
| 52 | 
             
                         "3-2: [Sports Concepts] -- Riding Horse",
         | 
| 53 | 
             
                         "3-3: [Sports Concepts] -- Lifting Weights",
         | 
| 54 | 
            -
                         "3-4: [Sports Concepts] -- Playing Golf"
         | 
|  | |
| 55 | 
             
                         ],
         | 
| 56 | 
             
                        label="MotionDirector",
         | 
| 57 | 
             
                        info="Which MotionDirector would you like to use!"
         | 
| @@ -74,19 +85,31 @@ with gr.Blocks() as demo: | |
| 74 | 
             
                gr.Examples(
         | 
| 75 | 
             
                    fn=motiondirector,
         | 
| 76 | 
             
                    examples=[
         | 
| 77 | 
            -
                        ["1-1: [Cinematic Shots] --  | 
| 78 | 
            -
             | 
| 79 | 
            -
                        ["1- | 
| 80 | 
            -
                        [" | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 81 | 
             
                        ["2-2: [Object Trajectory] -- Left to Right", "A tiger is running in the forest.", 3463673],
         | 
| 82 | 
             
                        ["3-1: [Sports Concepts] -- Riding Bicycle", "An astronaut is riding a bicycle past the pyramids Mars 4K high quailty highly detailed.", 4422954],
         | 
| 83 | 
             
                        ["3-2: [Sports Concepts] -- Riding Horse", "A man riding an elephant through the jungle.", 6230765],
         | 
| 84 | 
             
                        ["3-3: [Sports Concepts] -- Lifting Weights", "A panda is lifting weights in a garden.", 1699276],
         | 
| 85 | 
            -
                        ["3-4: [Sports Concepts] -- Playing Golf", "A  | 
|  | |
| 86 | 
             
                    ],
         | 
| 87 | 
             
                    inputs=[model_select, text_pormpt, random_seed],
         | 
| 88 | 
             
                    outputs=generated_video,
         | 
| 89 | 
             
                )
         | 
| 90 |  | 
| 91 | 
             
            demo.queue(max_size=15)
         | 
| 92 | 
            -
            demo.launch(share= | 
|  | |
| 21 | 
             
                    </a>
         | 
| 22 | 
             
                    <div>
         | 
| 23 | 
             
                        <h1 >MotionDirector: Motion Customization of Text-to-Video Diffusion Models</h1>
         | 
| 24 | 
            +
                        <h5 style="margin: 0;">More MotionDirectors are on the way. Stay tuned 🔥!</h5>
         | 
| 25 | 
            +
                         <h5 style="margin: 0;"> If you like our project, please give us a star ✨ on Github for the latest update.</h5>
         | 
| 26 | 
             
                        </br>
         | 
| 27 | 
             
                        <div style="display: flex; justify-content: center; align-items: center; text-align: center;>
         | 
| 28 | 
            +
                            <a href="https://arxiv.org/abs/2310.08465"></a>
         | 
| 29 | 
            +
                            <a href="https://arxiv.org/abs/2310.08465"><img src="https://img.shields.io/badge/arXiv-2310.08465-b31b1b.svg"></a>  
         | 
| 30 | 
            +
                            <a href="https://showlab.github.io/MotionDirector"><img src="https://img.shields.io/badge/Project_Page-MotionDirector-green"></a>  
         | 
| 31 | 
            +
                            <a href="https://github.com/showlab/MotionDirector"><img src="https://img.shields.io/badge/Github-Code-blue"></a>  
         | 
| 32 | 
             
                        </div>
         | 
| 33 | 
             
                    </div>
         | 
| 34 | 
             
                    </div>
         | 
|  | |
| 45 |  | 
| 46 | 
             
                with gr.Row():
         | 
| 47 | 
             
                    model_select = gr.Dropdown(
         | 
| 48 | 
            +
                        ["1-1: [Cinematic Shots] -- Zoom Out",
         | 
| 49 | 
             
                         "1-2: [Cinematic Shots] -- Zoom In",
         | 
| 50 | 
             
                         "1-3: [Cinematic Shots] -- Zoom Out",
         | 
| 51 | 
            +
                         "1-3: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 1",
         | 
| 52 | 
            +
                         "1-4: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 2",
         | 
| 53 | 
            +
                         "1-5: [Cinematic Shots] -- Follow",
         | 
| 54 | 
            +
                         "1-6: [Cinematic Shots] -- Reverse Follow",
         | 
| 55 | 
            +
                         "1-7: [Cinematic Shots] -- Chest Transition",
         | 
| 56 | 
            +
                         "1-8: [Cinematic Shots] -- Mini Jib Reveal",
         | 
| 57 | 
            +
                         "1-9: [Cinematic Shots] -- Orbit",
         | 
| 58 | 
            +
                         "1-10: [Cinematic Shots] -- Pull Back",
         | 
| 59 | 
             
                         "2-1: [Object Trajectory] -- Right to Left",
         | 
| 60 | 
             
                         "2-2: [Object Trajectory] -- Left to Right",
         | 
| 61 | 
             
                         "3-1: [Sports Concepts] -- Riding Bicycle",
         | 
| 62 | 
             
                         "3-2: [Sports Concepts] -- Riding Horse",
         | 
| 63 | 
             
                         "3-3: [Sports Concepts] -- Lifting Weights",
         | 
| 64 | 
            +
                         "3-4: [Sports Concepts] -- Playing Golf",
         | 
| 65 | 
            +
                         "3-5: [Sports Concepts] -- Skateboarding",
         | 
| 66 | 
             
                         ],
         | 
| 67 | 
             
                        label="MotionDirector",
         | 
| 68 | 
             
                        info="Which MotionDirector would you like to use!"
         | 
|  | |
| 85 | 
             
                gr.Examples(
         | 
| 86 | 
             
                    fn=motiondirector,
         | 
| 87 | 
             
                    examples=[
         | 
| 88 | 
            +
                        ["1-1: [Cinematic Shots] -- Zoom Out", "A spaceman standing on the moon captured with a zoom out.",
         | 
| 89 | 
            +
                         8323920],
         | 
| 90 | 
            +
                        ["1-2: [Cinematic Shots] -- Zoom In", "A polar bear standing at the top of a snowy mountain captured with a zoom in.", 7938587],
         | 
| 91 | 
            +
                        ["1-3: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 1", "A panda standing in front of an ancient Chinese temple captured with a dolly zoom.", 8238823],
         | 
| 92 | 
            +
                        ["1-4: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 2", "A lion sitting on top of a cliff captured with a dolly zoom.", 1675932],
         | 
| 93 | 
            +
                        ["1-5: [Cinematic Shots] -- Follow", "A fireman is walking through fire captured with a follow cinematic shot.", 2927089],
         | 
| 94 | 
            +
                        ["1-6: [Cinematic Shots] -- Reverse Follow", "A fireman is walking through fire captured with a reverse follow cinematic shot.", 9759630],
         | 
| 95 | 
            +
                        ["1-7: [Cinematic Shots] -- Chest Transition", "An ancient Roman soldier walks through the crowd on the street captured with a chest transition cinematic shot.", 3982271],
         | 
| 96 | 
            +
                        ["1-8: [Cinematic Shots] -- Mini Jib Reveal",
         | 
| 97 | 
            +
                         "A British Redcoat soldier is walking through the mountains captured with a mini jib reveal cinematic shot.",
         | 
| 98 | 
            +
                         566917],
         | 
| 99 | 
            +
                        ["1-9: [Cinematic Shots] -- Orbit", "A spaceman on the moon captured with an orbit cinematic shot.", 5899496],
         | 
| 100 | 
            +
                        ["1-10: [Cinematic Shots] -- Pull Back", "A spaceman on the moon looking at a lunar rover captured with a pull back cinematic shot.",
         | 
| 101 | 
            +
                         5585865],
         | 
| 102 | 
            +
                        ["2-1: [Object Trajectory] -- Right to Left", "A tank is running on the moon.", 2047046],
         | 
| 103 | 
             
                        ["2-2: [Object Trajectory] -- Left to Right", "A tiger is running in the forest.", 3463673],
         | 
| 104 | 
             
                        ["3-1: [Sports Concepts] -- Riding Bicycle", "An astronaut is riding a bicycle past the pyramids Mars 4K high quailty highly detailed.", 4422954],
         | 
| 105 | 
             
                        ["3-2: [Sports Concepts] -- Riding Horse", "A man riding an elephant through the jungle.", 6230765],
         | 
| 106 | 
             
                        ["3-3: [Sports Concepts] -- Lifting Weights", "A panda is lifting weights in a garden.", 1699276],
         | 
| 107 | 
            +
                        ["3-4: [Sports Concepts] -- Playing Golf", "A monkey is playing golf on a field full of flowers.", 4156856],
         | 
| 108 | 
            +
                        ["3-5: [Sports Concepts] -- Skateboarding", "An astronaut is skateboarding on Mars.", 6615212],
         | 
| 109 | 
             
                    ],
         | 
| 110 | 
             
                    inputs=[model_select, text_pormpt, random_seed],
         | 
| 111 | 
             
                    outputs=generated_video,
         | 
| 112 | 
             
                )
         | 
| 113 |  | 
| 114 | 
             
            demo.queue(max_size=15)
         | 
| 115 | 
            +
            demo.launch(share=True)
         | 
    	
        demo/motiondirector.py
    CHANGED
    
    | @@ -85,11 +85,25 @@ def prepare_input_latents( | |
| 85 | 
             
                height: int,
         | 
| 86 | 
             
                width: int,
         | 
| 87 | 
             
                latents_path:str,
         | 
| 88 | 
            -
                 | 
|  | |
| 89 | 
             
            ):
         | 
| 90 | 
             
                # initialize with random gaussian noise
         | 
| 91 | 
             
                scale = pipe.vae_scale_factor
         | 
| 92 | 
             
                shape = (batch_size, pipe.unet.config.in_channels, num_frames, height // scale, width // scale)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 93 | 
             
                if noise_prior > 0.:
         | 
| 94 | 
             
                    cached_latents = torch.load(latents_path)
         | 
| 95 | 
             
                    if 'inversion_noise' not in cached_latents:
         | 
| @@ -139,20 +153,6 @@ class MotionDirector(): | |
| 139 | 
             
                    latents_path = f"{latents_folder}/{random.choice(os.listdir(latents_folder))}"
         | 
| 140 | 
             
                    assert os.path.exists(lora_path)
         | 
| 141 |  | 
| 142 | 
            -
                    if '1-' in model_select:
         | 
| 143 | 
            -
                        noise_prior = 0.3
         | 
| 144 | 
            -
                    elif '2-' in model_select:
         | 
| 145 | 
            -
                        noise_prior = 0.5
         | 
| 146 | 
            -
                    elif '3-' in model_select:
         | 
| 147 | 
            -
                        noise_prior = 0.
         | 
| 148 | 
            -
                    else:
         | 
| 149 | 
            -
                        noise_prior = 0.
         | 
| 150 | 
            -
             | 
| 151 | 
            -
                    if random_seed > 1000:
         | 
| 152 | 
            -
                        torch.manual_seed(random_seed)
         | 
| 153 | 
            -
                    else:
         | 
| 154 | 
            -
                        random_seed = random.randint(100, 10000000)
         | 
| 155 | 
            -
                        torch.manual_seed(random_seed)
         | 
| 156 | 
             
                    device = "cuda"
         | 
| 157 | 
             
                    with torch.autocast(device, dtype=torch.half):
         | 
| 158 | 
             
                        # prepare input latents
         | 
| @@ -164,7 +164,8 @@ class MotionDirector(): | |
| 164 | 
             
                                height=384,
         | 
| 165 | 
             
                                width=384,
         | 
| 166 | 
             
                                latents_path=latents_path,
         | 
| 167 | 
            -
                                 | 
|  | |
| 168 | 
             
                            )
         | 
| 169 | 
             
                            video_frames = self.pipe(
         | 
| 170 | 
             
                                prompt=text_pormpt,
         | 
| @@ -177,7 +178,6 @@ class MotionDirector(): | |
| 177 | 
             
                                latents=init_latents
         | 
| 178 | 
             
                            ).frames
         | 
| 179 |  | 
| 180 | 
            -
             | 
| 181 | 
             
                            out_file = f"{out_name}_{random_seed}.mp4"
         | 
| 182 | 
             
                            os.makedirs(os.path.dirname(out_file), exist_ok=True)
         | 
| 183 | 
             
                            export_to_video(video_frames, out_file, 8)
         | 
|  | |
| 85 | 
             
                height: int,
         | 
| 86 | 
             
                width: int,
         | 
| 87 | 
             
                latents_path:str,
         | 
| 88 | 
            +
                model_select: str,
         | 
| 89 | 
            +
                random_seed: int,
         | 
| 90 | 
             
            ):
         | 
| 91 | 
             
                # initialize with random gaussian noise
         | 
| 92 | 
             
                scale = pipe.vae_scale_factor
         | 
| 93 | 
             
                shape = (batch_size, pipe.unet.config.in_channels, num_frames, height // scale, width // scale)
         | 
| 94 | 
            +
                if random_seed > 1000:
         | 
| 95 | 
            +
                    torch.manual_seed(random_seed)
         | 
| 96 | 
            +
                else:
         | 
| 97 | 
            +
                    random_seed = random.randint(100, 10000000)
         | 
| 98 | 
            +
                    torch.manual_seed(random_seed)
         | 
| 99 | 
            +
                if '1-' in model_select:
         | 
| 100 | 
            +
                    noise_prior = 0.3
         | 
| 101 | 
            +
                elif '2-' in model_select:
         | 
| 102 | 
            +
                    noise_prior = 0.5
         | 
| 103 | 
            +
                elif '3-' in model_select:
         | 
| 104 | 
            +
                    noise_prior = 0.
         | 
| 105 | 
            +
                else:
         | 
| 106 | 
            +
                    noise_prior = 0.
         | 
| 107 | 
             
                if noise_prior > 0.:
         | 
| 108 | 
             
                    cached_latents = torch.load(latents_path)
         | 
| 109 | 
             
                    if 'inversion_noise' not in cached_latents:
         | 
|  | |
| 153 | 
             
                    latents_path = f"{latents_folder}/{random.choice(os.listdir(latents_folder))}"
         | 
| 154 | 
             
                    assert os.path.exists(lora_path)
         | 
| 155 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 156 | 
             
                    device = "cuda"
         | 
| 157 | 
             
                    with torch.autocast(device, dtype=torch.half):
         | 
| 158 | 
             
                        # prepare input latents
         | 
|  | |
| 164 | 
             
                                height=384,
         | 
| 165 | 
             
                                width=384,
         | 
| 166 | 
             
                                latents_path=latents_path,
         | 
| 167 | 
            +
                                model_select=model_select,
         | 
| 168 | 
            +
                                random_seed=random_seed
         | 
| 169 | 
             
                            )
         | 
| 170 | 
             
                            video_frames = self.pipe(
         | 
| 171 | 
             
                                prompt=text_pormpt,
         | 
|  | |
| 178 | 
             
                                latents=init_latents
         | 
| 179 | 
             
                            ).frames
         | 
| 180 |  | 
|  | |
| 181 | 
             
                            out_file = f"{out_name}_{random_seed}.mp4"
         | 
| 182 | 
             
                            os.makedirs(os.path.dirname(out_file), exist_ok=True)
         | 
| 183 | 
             
                            export_to_video(video_frames, out_file, 8)
         | 
