Spaces:
Configuration error
Configuration error
Commit
Β·
00b18c3
1
Parent(s):
c7c6869
clean code
Browse files- README.md +7 -6
- annotator/dwpose/wholebody.py +1 -97
- video_diffusion/common/image_util.py +0 -25
README.md
CHANGED
@@ -2,9 +2,10 @@
|
|
2 |
## [<a href="https://knightyxp.github.io/VideoGrain_project_page/" target="_blank">Project Page</a>]
|
3 |
|
4 |
[](https://arxiv.org/abs/2502.17258)
|
5 |
-
[](https://knightyxp.github.io/VideoGrain_project_page/)
|
7 |
-
|
|
|
8 |
|
9 |
<table class="center" border="1" cellspacing="0" cellpadding="5">
|
10 |
<tr>
|
@@ -70,7 +71,7 @@ bash download_all.sh
|
|
70 |
|
71 |
<details><summary>Click for ControlNet annotator weights (if you can not access to huggingface)</summary>
|
72 |
|
73 |
-
You can download all the annotator checkpoints (such as DW-Pose, depth_zoe, depth_midas, and OpenPose, cost around 4G
|
74 |
Then extract them into ./annotator/ckpts
|
75 |
|
76 |
</details>
|
@@ -97,10 +98,10 @@ or accelerate launch test.py --config config/part_level/adding_new_object/run_tw
|
|
97 |
```
|
98 |
result
|
99 |
βββ run_two_man
|
100 |
-
β βββ control
|
101 |
β βββ infer_samples
|
102 |
-
β βββ input
|
103 |
-
β βββ masked_video.mp4
|
104 |
β βββ sample
|
105 |
β βββ step_0 # result image folder
|
106 |
β βββ step_0.mp4 # result video
|
|
|
2 |
## [<a href="https://knightyxp.github.io/VideoGrain_project_page/" target="_blank">Project Page</a>]
|
3 |
|
4 |
[](https://arxiv.org/abs/2502.17258)
|
5 |
+
[](https://huggingface.co/papers/2502.17258)
|
6 |
[](https://knightyxp.github.io/VideoGrain_project_page/)
|
7 |
+

|
8 |
+
[](https://youtu.be/JKDLet618hU)
|
9 |
|
10 |
<table class="center" border="1" cellspacing="0" cellpadding="5">
|
11 |
<tr>
|
|
|
71 |
|
72 |
<details><summary>Click for ControlNet annotator weights (if you can not access to huggingface)</summary>
|
73 |
|
74 |
+
You can download all the annotator checkpoints (such as DW-Pose, depth_zoe, depth_midas, and OpenPose, cost around 4G) from [baidu](https://pan.baidu.com/s/1sgBFLFkdTCDTn4oqHjGb9A?pwd=pdm5) or [google](https://drive.google.com/file/d/1qOsmWshnFMMr8x1HteaTViTSQLh_4rle/view?usp=drive_link)
|
75 |
Then extract them into ./annotator/ckpts
|
76 |
|
77 |
</details>
|
|
|
98 |
```
|
99 |
result
|
100 |
βββ run_two_man
|
101 |
+
β βββ control # control conditon
|
102 |
β βββ infer_samples
|
103 |
+
β βββ input # the input video frames
|
104 |
+
β βββ masked_video.mp4 # check whether edit regions are accuratedly covered
|
105 |
β βββ sample
|
106 |
β βββ step_0 # result image folder
|
107 |
β βββ step_0.mp4 # result video
|
annotator/dwpose/wholebody.py
CHANGED
@@ -60,100 +60,4 @@ class Wholebody:
|
|
60 |
keypoints, scores = keypoints_info[
|
61 |
..., :2], keypoints_info[..., 2]
|
62 |
|
63 |
-
return keypoints, scores
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
# # Copyright (c) OpenMMLab. All rights reserved.
|
69 |
-
# import numpy as np
|
70 |
-
# from . import util
|
71 |
-
# import cv2
|
72 |
-
# import mmcv
|
73 |
-
# import torch
|
74 |
-
# import matplotlib.pyplot as plt
|
75 |
-
# from mmpose.apis import inference_topdown
|
76 |
-
# from mmpose.apis import init_model as init_pose_estimator
|
77 |
-
# from mmpose.evaluation.functional import nms
|
78 |
-
# from mmpose.utils import adapt_mmdet_pipeline
|
79 |
-
# from mmpose.structures import merge_data_samples
|
80 |
-
|
81 |
-
# from mmdet.apis import inference_detector, init_detector
|
82 |
-
|
83 |
-
|
84 |
-
# class Wholebody:
|
85 |
-
# def __init__(self):
|
86 |
-
# device = 'cuda:0'
|
87 |
-
# det_config = 'annotator/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py'
|
88 |
-
# det_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth'
|
89 |
-
# pose_config = 'annotator/dwpose/dwpose_config/dwpose-l_384x288.py'
|
90 |
-
# pose_ckpt = 'annotator/ckpts/dw-ll_ucoco_384.pth'
|
91 |
-
|
92 |
-
# # build detector
|
93 |
-
# self.detector = init_detector(det_config, det_ckpt, device=device)
|
94 |
-
# self.detector.cfg = adapt_mmdet_pipeline(self.detector.cfg)
|
95 |
-
|
96 |
-
# # build pose estimator
|
97 |
-
# self.pose_estimator = init_pose_estimator(
|
98 |
-
# pose_config,
|
99 |
-
# pose_ckpt,
|
100 |
-
# device=device)
|
101 |
-
|
102 |
-
# def __call__(self, oriImg):
|
103 |
-
# # predict bbox
|
104 |
-
# det_result = inference_detector(self.detector, oriImg)
|
105 |
-
# pred_instance = det_result.pred_instances.cpu().numpy()
|
106 |
-
# bboxes = np.concatenate(
|
107 |
-
# (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
|
108 |
-
# bboxes = bboxes[np.logical_and(pred_instance.labels == 0,
|
109 |
-
# pred_instance.scores > 0.3)]
|
110 |
-
# # # max value
|
111 |
-
# # if len(bboxes) > 0:
|
112 |
-
# # bboxes = bboxes[0].reshape(1,-1)
|
113 |
-
# bboxes = bboxes[nms(bboxes, 0.3), :4]
|
114 |
-
|
115 |
-
# # predict keypoints
|
116 |
-
# if len(bboxes) == 0:
|
117 |
-
# pose_results = inference_topdown(self.pose_estimator, oriImg)
|
118 |
-
# else:
|
119 |
-
# pose_results = inference_topdown(self.pose_estimator, oriImg, bboxes)
|
120 |
-
# preds = merge_data_samples(pose_results)
|
121 |
-
# preds = preds.pred_instances
|
122 |
-
|
123 |
-
# # preds = pose_results[0].pred_instances
|
124 |
-
# keypoints = preds.get('transformed_keypoints',
|
125 |
-
# preds.keypoints)
|
126 |
-
# if 'keypoint_scores' in preds:
|
127 |
-
# scores = preds.keypoint_scores
|
128 |
-
# else:
|
129 |
-
# scores = np.ones(keypoints.shape[:-1])
|
130 |
-
|
131 |
-
# if 'keypoints_visible' in preds:
|
132 |
-
# visible = preds.keypoints_visible
|
133 |
-
# else:
|
134 |
-
# visible = np.ones(keypoints.shape[:-1])
|
135 |
-
# keypoints_info = np.concatenate(
|
136 |
-
# (keypoints, scores[..., None], visible[..., None]),
|
137 |
-
# axis=-1)
|
138 |
-
# # compute neck joint
|
139 |
-
# neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
|
140 |
-
# # neck score when visualizing pred
|
141 |
-
# neck[:, 2:4] = np.logical_and(
|
142 |
-
# keypoints_info[:, 5, 2:4] > 0.3,
|
143 |
-
# keypoints_info[:, 6, 2:4] > 0.3).astype(int)
|
144 |
-
# new_keypoints_info = np.insert(
|
145 |
-
# keypoints_info, 17, neck, axis=1)
|
146 |
-
# mmpose_idx = [
|
147 |
-
# 17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3
|
148 |
-
# ]
|
149 |
-
# openpose_idx = [
|
150 |
-
# 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17
|
151 |
-
# ]
|
152 |
-
# new_keypoints_info[:, openpose_idx] = \
|
153 |
-
# new_keypoints_info[:, mmpose_idx]
|
154 |
-
# keypoints_info = new_keypoints_info
|
155 |
-
|
156 |
-
# keypoints, scores, visible = keypoints_info[
|
157 |
-
# ..., :2], keypoints_info[..., 2], keypoints_info[..., 3]
|
158 |
-
|
159 |
-
# return keypoints, scores
|
|
|
60 |
keypoints, scores = keypoints_info[
|
61 |
..., :2], keypoints_info[..., 2]
|
62 |
|
63 |
+
return keypoints, scores
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
video_diffusion/common/image_util.py
CHANGED
@@ -19,8 +19,6 @@ import torchvision.transforms.functional as F
|
|
19 |
import random
|
20 |
from scipy.ndimage import binary_dilation
|
21 |
import sys
|
22 |
-
sys.path.append('/home/xianyang/Data/code/FateZero/video_diffusion/gmflow')
|
23 |
-
from gmflow.gmflow import GMFlow
|
24 |
|
25 |
IMAGE_EXTENSION = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp", ".JPEG")
|
26 |
|
@@ -577,29 +575,6 @@ def sample_trajectories_new(video_path, device,height,width):
|
|
577 |
|
578 |
clips = list(range(len(frames)))
|
579 |
|
580 |
-
#=============== GM-flow estimate forward optical flow============#
|
581 |
-
# model = GMFlow(feature_channels=128,
|
582 |
-
# num_scales=1,
|
583 |
-
# upsample_factor=8,
|
584 |
-
# num_head=1,
|
585 |
-
# attention_type='swin',
|
586 |
-
# ffn_dim_expansion=4,
|
587 |
-
# num_transformer_layers=6,
|
588 |
-
# ).to(device)
|
589 |
-
# checkpoint = torch.load('/home/xianyang/Data/code/FRESCO/model/gmflow_sintel-0c07dcb3.pth', map_location=lambda storage, loc: storage)
|
590 |
-
# weights = checkpoint['model'] if 'model' in checkpoint else checkpoint
|
591 |
-
# model.load_state_dict(weights, strict=False)
|
592 |
-
# model.eval()
|
593 |
-
# finished_trajectories = []
|
594 |
-
|
595 |
-
# current_frames, next_frames = preprocess(frames[clips[:-1]], frames[clips[1:]], transforms, height,width)
|
596 |
-
# results_dict = model(current_frames.to(device), next_frames.to(device), attn_splits_list=[2],
|
597 |
-
# corr_radius_list=[-1], prop_radius_list=[-1], pred_bidir_flow=True)
|
598 |
-
# flow_pr = results_dict['flow_preds'][-1] # [2*B, 2, H, W]
|
599 |
-
# fwd_flows, bwd_flows = flow_pr.chunk(2) # [B, 2, H, W]
|
600 |
-
# predicted_flows = fwd_flows
|
601 |
-
#=============== GM-flow estimate forward optical flow============#
|
602 |
-
|
603 |
#=============== raft-large estimate forward optical flow============#
|
604 |
model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
|
605 |
model = model.eval()
|
|
|
19 |
import random
|
20 |
from scipy.ndimage import binary_dilation
|
21 |
import sys
|
|
|
|
|
22 |
|
23 |
IMAGE_EXTENSION = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp", ".JPEG")
|
24 |
|
|
|
575 |
|
576 |
clips = list(range(len(frames)))
|
577 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
578 |
#=============== raft-large estimate forward optical flow============#
|
579 |
model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
|
580 |
model = model.eval()
|