XiangpengYang commited on
Commit
00b18c3
Β·
1 Parent(s): c7c6869

clean code

Browse files
README.md CHANGED
@@ -2,9 +2,10 @@
2
  ## [<a href="https://knightyxp.github.io/VideoGrain_project_page/" target="_blank">Project Page</a>]
3
 
4
  [![arXiv](https://img.shields.io/badge/arXiv-2502.17258-B31B1B.svg)](https://arxiv.org/abs/2502.17258)
5
- [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/papers/2502.17258)
6
  [![Project page](https://img.shields.io/badge/Project-Page-brightgreen)](https://knightyxp.github.io/VideoGrain_project_page/)
7
-
 
8
 
9
  <table class="center" border="1" cellspacing="0" cellpadding="5">
10
  <tr>
@@ -70,7 +71,7 @@ bash download_all.sh
70
 
71
  <details><summary>Click for ControlNet annotator weights (if you can not access to huggingface)</summary>
72
 
73
- You can download all the annotator checkpoints (such as DW-Pose, depth_zoe, depth_midas, and OpenPose, cost around 4G.) from [baidu](https://pan.baidu.com/s/1sgBFLFkdTCDTn4oqHjGb9A?pwd=pdm5) or [google](https://drive.google.com/file/d/1qOsmWshnFMMr8x1HteaTViTSQLh_4rle/view?usp=drive_link)
74
  Then extract them into ./annotator/ckpts
75
 
76
  </details>
@@ -97,10 +98,10 @@ or accelerate launch test.py --config config/part_level/adding_new_object/run_tw
97
  ```
98
  result
99
  β”œβ”€β”€ run_two_man
100
- β”‚ β”œβ”€β”€ control # control conditon
101
  β”‚ β”œβ”€β”€ infer_samples
102
- β”‚ β”œβ”€β”€ input # the input video frames
103
- β”‚ β”œβ”€β”€ masked_video.mp4 # check whether edit regions are accuratedly covered
104
  β”‚ β”œβ”€β”€ sample
105
  β”‚ β”œβ”€β”€ step_0 # result image folder
106
  β”‚ β”œβ”€β”€ step_0.mp4 # result video
 
2
  ## [<a href="https://knightyxp.github.io/VideoGrain_project_page/" target="_blank">Project Page</a>]
3
 
4
  [![arXiv](https://img.shields.io/badge/arXiv-2502.17258-B31B1B.svg)](https://arxiv.org/abs/2502.17258)
5
+ [![HuggingFace Daily Papers Top1](https://img.shields.io/static/v1?label=HuggingFace%20Daily%20Papers&message=Top1&color=blue)](https://huggingface.co/papers/2502.17258)
6
  [![Project page](https://img.shields.io/badge/Project-Page-brightgreen)](https://knightyxp.github.io/VideoGrain_project_page/)
7
+ ![visitors](https://visitor-badge.laobi.icu/badge?page_id=knightyxp.VideoGrain&left_color=green&right_color=red)
8
+ [![Demo Video - VideoGrain](https://img.shields.io/badge/Demo_Video-VideoGrain-red)](https://youtu.be/JKDLet618hU)
9
 
10
  <table class="center" border="1" cellspacing="0" cellpadding="5">
11
  <tr>
 
71
 
72
  <details><summary>Click for ControlNet annotator weights (if you can not access to huggingface)</summary>
73
 
74
+ You can download all the annotator checkpoints (such as DW-Pose, depth_zoe, depth_midas, and OpenPose, cost around 4G) from [baidu](https://pan.baidu.com/s/1sgBFLFkdTCDTn4oqHjGb9A?pwd=pdm5) or [google](https://drive.google.com/file/d/1qOsmWshnFMMr8x1HteaTViTSQLh_4rle/view?usp=drive_link)
75
  Then extract them into ./annotator/ckpts
76
 
77
  </details>
 
98
  ```
99
  result
100
  β”œβ”€β”€ run_two_man
101
+ β”‚ β”œβ”€β”€ control # control conditon
102
  β”‚ β”œβ”€β”€ infer_samples
103
+ β”‚ β”œβ”€β”€ input # the input video frames
104
+ β”‚ β”œβ”€β”€ masked_video.mp4 # check whether edit regions are accuratedly covered
105
  β”‚ β”œβ”€β”€ sample
106
  β”‚ β”œβ”€β”€ step_0 # result image folder
107
  β”‚ β”œβ”€β”€ step_0.mp4 # result video
annotator/dwpose/wholebody.py CHANGED
@@ -60,100 +60,4 @@ class Wholebody:
60
  keypoints, scores = keypoints_info[
61
  ..., :2], keypoints_info[..., 2]
62
 
63
- return keypoints, scores
64
-
65
-
66
-
67
-
68
- # # Copyright (c) OpenMMLab. All rights reserved.
69
- # import numpy as np
70
- # from . import util
71
- # import cv2
72
- # import mmcv
73
- # import torch
74
- # import matplotlib.pyplot as plt
75
- # from mmpose.apis import inference_topdown
76
- # from mmpose.apis import init_model as init_pose_estimator
77
- # from mmpose.evaluation.functional import nms
78
- # from mmpose.utils import adapt_mmdet_pipeline
79
- # from mmpose.structures import merge_data_samples
80
-
81
- # from mmdet.apis import inference_detector, init_detector
82
-
83
-
84
- # class Wholebody:
85
- # def __init__(self):
86
- # device = 'cuda:0'
87
- # det_config = 'annotator/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py'
88
- # det_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth'
89
- # pose_config = 'annotator/dwpose/dwpose_config/dwpose-l_384x288.py'
90
- # pose_ckpt = 'annotator/ckpts/dw-ll_ucoco_384.pth'
91
-
92
- # # build detector
93
- # self.detector = init_detector(det_config, det_ckpt, device=device)
94
- # self.detector.cfg = adapt_mmdet_pipeline(self.detector.cfg)
95
-
96
- # # build pose estimator
97
- # self.pose_estimator = init_pose_estimator(
98
- # pose_config,
99
- # pose_ckpt,
100
- # device=device)
101
-
102
- # def __call__(self, oriImg):
103
- # # predict bbox
104
- # det_result = inference_detector(self.detector, oriImg)
105
- # pred_instance = det_result.pred_instances.cpu().numpy()
106
- # bboxes = np.concatenate(
107
- # (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
108
- # bboxes = bboxes[np.logical_and(pred_instance.labels == 0,
109
- # pred_instance.scores > 0.3)]
110
- # # # max value
111
- # # if len(bboxes) > 0:
112
- # # bboxes = bboxes[0].reshape(1,-1)
113
- # bboxes = bboxes[nms(bboxes, 0.3), :4]
114
-
115
- # # predict keypoints
116
- # if len(bboxes) == 0:
117
- # pose_results = inference_topdown(self.pose_estimator, oriImg)
118
- # else:
119
- # pose_results = inference_topdown(self.pose_estimator, oriImg, bboxes)
120
- # preds = merge_data_samples(pose_results)
121
- # preds = preds.pred_instances
122
-
123
- # # preds = pose_results[0].pred_instances
124
- # keypoints = preds.get('transformed_keypoints',
125
- # preds.keypoints)
126
- # if 'keypoint_scores' in preds:
127
- # scores = preds.keypoint_scores
128
- # else:
129
- # scores = np.ones(keypoints.shape[:-1])
130
-
131
- # if 'keypoints_visible' in preds:
132
- # visible = preds.keypoints_visible
133
- # else:
134
- # visible = np.ones(keypoints.shape[:-1])
135
- # keypoints_info = np.concatenate(
136
- # (keypoints, scores[..., None], visible[..., None]),
137
- # axis=-1)
138
- # # compute neck joint
139
- # neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
140
- # # neck score when visualizing pred
141
- # neck[:, 2:4] = np.logical_and(
142
- # keypoints_info[:, 5, 2:4] > 0.3,
143
- # keypoints_info[:, 6, 2:4] > 0.3).astype(int)
144
- # new_keypoints_info = np.insert(
145
- # keypoints_info, 17, neck, axis=1)
146
- # mmpose_idx = [
147
- # 17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3
148
- # ]
149
- # openpose_idx = [
150
- # 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17
151
- # ]
152
- # new_keypoints_info[:, openpose_idx] = \
153
- # new_keypoints_info[:, mmpose_idx]
154
- # keypoints_info = new_keypoints_info
155
-
156
- # keypoints, scores, visible = keypoints_info[
157
- # ..., :2], keypoints_info[..., 2], keypoints_info[..., 3]
158
-
159
- # return keypoints, scores
 
60
  keypoints, scores = keypoints_info[
61
  ..., :2], keypoints_info[..., 2]
62
 
63
+ return keypoints, scores
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
video_diffusion/common/image_util.py CHANGED
@@ -19,8 +19,6 @@ import torchvision.transforms.functional as F
19
  import random
20
  from scipy.ndimage import binary_dilation
21
  import sys
22
- sys.path.append('/home/xianyang/Data/code/FateZero/video_diffusion/gmflow')
23
- from gmflow.gmflow import GMFlow
24
 
25
  IMAGE_EXTENSION = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp", ".JPEG")
26
 
@@ -577,29 +575,6 @@ def sample_trajectories_new(video_path, device,height,width):
577
 
578
  clips = list(range(len(frames)))
579
 
580
- #=============== GM-flow estimate forward optical flow============#
581
- # model = GMFlow(feature_channels=128,
582
- # num_scales=1,
583
- # upsample_factor=8,
584
- # num_head=1,
585
- # attention_type='swin',
586
- # ffn_dim_expansion=4,
587
- # num_transformer_layers=6,
588
- # ).to(device)
589
- # checkpoint = torch.load('/home/xianyang/Data/code/FRESCO/model/gmflow_sintel-0c07dcb3.pth', map_location=lambda storage, loc: storage)
590
- # weights = checkpoint['model'] if 'model' in checkpoint else checkpoint
591
- # model.load_state_dict(weights, strict=False)
592
- # model.eval()
593
- # finished_trajectories = []
594
-
595
- # current_frames, next_frames = preprocess(frames[clips[:-1]], frames[clips[1:]], transforms, height,width)
596
- # results_dict = model(current_frames.to(device), next_frames.to(device), attn_splits_list=[2],
597
- # corr_radius_list=[-1], prop_radius_list=[-1], pred_bidir_flow=True)
598
- # flow_pr = results_dict['flow_preds'][-1] # [2*B, 2, H, W]
599
- # fwd_flows, bwd_flows = flow_pr.chunk(2) # [B, 2, H, W]
600
- # predicted_flows = fwd_flows
601
- #=============== GM-flow estimate forward optical flow============#
602
-
603
  #=============== raft-large estimate forward optical flow============#
604
  model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
605
  model = model.eval()
 
19
  import random
20
  from scipy.ndimage import binary_dilation
21
  import sys
 
 
22
 
23
  IMAGE_EXTENSION = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp", ".JPEG")
24
 
 
575
 
576
  clips = list(range(len(frames)))
577
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
  #=============== raft-large estimate forward optical flow============#
579
  model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
580
  model = model.eval()