XiangpengYang commited on
Commit
867af24
·
1 Parent(s): c7cf5fc
Files changed (2) hide show
  1. README.md +9 -7
  2. config/demo_config.yaml +48 -0
README.md CHANGED
@@ -111,15 +111,17 @@ We segment videos using our ReLER lab's [SAM-Track](https://github.com/z-x-yang/
111
  ## 🔥 VideoGrain Editing
112
 
113
  ### Inference
 
114
  **prepare config**
 
115
  VideoGrain is a training-free framework. To run VideoGrain, please prepare your config follow these steps:
116
- - 1. Replace your pretrained model path and controlnet path in your config. you can change the control_type to `dwpose` or `depth_zoe` or `depth` (midas).
117
- - 2. Prepare your video frames and layout masks (edit regions) using SAM-Track or SAM2 in dataset config.
118
- - 3. Change the `prompt`, and extract each `local prompt` in the editing prompts. the local prompt order should be same as layout masks order.
119
- - 4. Your can change flatten resolution with 1->64, 2->16, 4->8. (commonly, flatten at 64 worked best)
120
- - 5. To ensure temporal consistency, you can set `use_pnp: True` and `inject_step:5-10`. (Note that pnp>10 steps will be bad for multi-regions editing)
121
- - 6. If you want to visualize the cross attn weight, set `vis_cross_attn: True`
122
- - 7. If you want to cluster DDIM Inversion spatial temporal video feature, set `cluster_inversion_feature: True`
123
 
124
  ```bash
125
  bash test.sh
 
111
  ## 🔥 VideoGrain Editing
112
 
113
  ### Inference
114
+
115
  **prepare config**
116
+
117
  VideoGrain is a training-free framework. To run VideoGrain, please prepare your config follow these steps:
118
+ 1. Replace your pretrained model path and controlnet path in your config. you can change the control_type to `dwpose` or `depth_zoe` or `depth` (midas).
119
+ 2. Prepare your video frames and layout masks (edit regions) using SAM-Track or SAM2 in dataset config.
120
+ 3. Change the `prompt`, and extract each `local prompt` in the editing prompts. the local prompt order should be same as layout masks order.
121
+ 4. Your can change flatten resolution with 1->64, 2->16, 4->8. (commonly, flatten at 64 worked best)
122
+ 5. To ensure temporal consistency, you can set `use_pnp: True` and `inject_step:5-10`. (Note that pnp>10 steps will be bad for multi-regions editing)
123
+ 6. If you want to visualize the cross attn weight, set `vis_cross_attn: True`
124
+ 7. If you want to cluster DDIM Inversion spatial temporal video feature, set `cluster_inversion_feature: True`
125
 
126
  ```bash
127
  bash test.sh
config/demo_config.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pretrained_model_path: "./ckpt/stable-diffusion-v1-5"
2
+ logdir: ./result/run_two_man/instance_level/3cls_spider_polar_vis_cross_attn
3
+
4
+ dataset_config:
5
+ path: "data/run_two_man/run_two_man_fr2"
6
+ prompt: 'Man in red hoddie and man in gray shirt are jogging in forest'
7
+ n_sample_frame: 16
8
+ start_sample_frame: 0
9
+ sampling_rate: 2
10
+ layout_mask_dir: "./data/run_two_man/layout_masks_fr2"
11
+ layout_mask_order: ['left_man_plus','right_man_plus','trees','trunk']
12
+ negative_promot: "ugly, blurry, low res, unrealistic, unaesthetic"
13
+
14
+ control_config:
15
+ control_type: "dwpose"
16
+ pretrained_controlnet_path: "./ckpt/control_v11p_sd15_openpose"
17
+ controlnet_conditioning_scale: 1.0
18
+ hand: True
19
+ face: False
20
+
21
+ editing_config:
22
+ use_invertion_latents: true
23
+ inject_step: 0
24
+ old_qk: 1
25
+ flatten_res: [1]
26
+ guidance_scale: 7.5
27
+ use_pnp: false
28
+ use_freeu: false
29
+ editing_prompts: [
30
+ ['Spiderman and Polar Bear are jogging under cherry trees','man','Polar Bear','cherry trees',''],
31
+ ]
32
+
33
+ clip_length: "${..dataset_config.n_sample_frame}"
34
+ sample_seeds: [0]
35
+ num_inference_steps: 50
36
+ blending_percentage: 0
37
+ vis_cross_attn: True
38
+ #cluster_inversion_feature: True
39
+
40
+
41
+ test_pipeline_config:
42
+ target: video_diffusion.pipelines.ddim_spatial_temporal.DDIMSpatioTemporalStableDiffusionPipeline
43
+ num_inference_steps: "${..validation_sample_logger.num_inference_steps}"
44
+
45
+
46
+
47
+ seed: 42
48
+