Spaces:
Configuration error
Configuration error
Commit
Β·
00b18c3
1
Parent(s):
c7c6869
clean code
Browse files- README.md +7 -6
- annotator/dwpose/wholebody.py +1 -97
- video_diffusion/common/image_util.py +0 -25
README.md
CHANGED
|
@@ -2,9 +2,10 @@
|
|
| 2 |
## [<a href="https://knightyxp.github.io/VideoGrain_project_page/" target="_blank">Project Page</a>]
|
| 3 |
|
| 4 |
[](https://arxiv.org/abs/2502.17258)
|
| 5 |
-
[](https://knightyxp.github.io/VideoGrain_project_page/)
|
| 7 |
-
|
|
|
|
| 8 |
|
| 9 |
<table class="center" border="1" cellspacing="0" cellpadding="5">
|
| 10 |
<tr>
|
|
@@ -70,7 +71,7 @@ bash download_all.sh
|
|
| 70 |
|
| 71 |
<details><summary>Click for ControlNet annotator weights (if you can not access to huggingface)</summary>
|
| 72 |
|
| 73 |
-
You can download all the annotator checkpoints (such as DW-Pose, depth_zoe, depth_midas, and OpenPose, cost around 4G
|
| 74 |
Then extract them into ./annotator/ckpts
|
| 75 |
|
| 76 |
</details>
|
|
@@ -97,10 +98,10 @@ or accelerate launch test.py --config config/part_level/adding_new_object/run_tw
|
|
| 97 |
```
|
| 98 |
result
|
| 99 |
βββ run_two_man
|
| 100 |
-
β βββ control
|
| 101 |
β βββ infer_samples
|
| 102 |
-
β βββ input
|
| 103 |
-
β βββ masked_video.mp4
|
| 104 |
β βββ sample
|
| 105 |
β βββ step_0 # result image folder
|
| 106 |
β βββ step_0.mp4 # result video
|
|
|
|
| 2 |
## [<a href="https://knightyxp.github.io/VideoGrain_project_page/" target="_blank">Project Page</a>]
|
| 3 |
|
| 4 |
[](https://arxiv.org/abs/2502.17258)
|
| 5 |
+
[](https://huggingface.co/papers/2502.17258)
|
| 6 |
[](https://knightyxp.github.io/VideoGrain_project_page/)
|
| 7 |
+

|
| 8 |
+
[](https://youtu.be/JKDLet618hU)
|
| 9 |
|
| 10 |
<table class="center" border="1" cellspacing="0" cellpadding="5">
|
| 11 |
<tr>
|
|
|
|
| 71 |
|
| 72 |
<details><summary>Click for ControlNet annotator weights (if you can not access to huggingface)</summary>
|
| 73 |
|
| 74 |
+
You can download all the annotator checkpoints (such as DW-Pose, depth_zoe, depth_midas, and OpenPose, cost around 4G) from [baidu](https://pan.baidu.com/s/1sgBFLFkdTCDTn4oqHjGb9A?pwd=pdm5) or [google](https://drive.google.com/file/d/1qOsmWshnFMMr8x1HteaTViTSQLh_4rle/view?usp=drive_link)
|
| 75 |
Then extract them into ./annotator/ckpts
|
| 76 |
|
| 77 |
</details>
|
|
|
|
| 98 |
```
|
| 99 |
result
|
| 100 |
βββ run_two_man
|
| 101 |
+
β βββ control # control conditon
|
| 102 |
β βββ infer_samples
|
| 103 |
+
β βββ input # the input video frames
|
| 104 |
+
β βββ masked_video.mp4 # check whether edit regions are accuratedly covered
|
| 105 |
β βββ sample
|
| 106 |
β βββ step_0 # result image folder
|
| 107 |
β βββ step_0.mp4 # result video
|
annotator/dwpose/wholebody.py
CHANGED
|
@@ -60,100 +60,4 @@ class Wholebody:
|
|
| 60 |
keypoints, scores = keypoints_info[
|
| 61 |
..., :2], keypoints_info[..., 2]
|
| 62 |
|
| 63 |
-
return keypoints, scores
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
# # Copyright (c) OpenMMLab. All rights reserved.
|
| 69 |
-
# import numpy as np
|
| 70 |
-
# from . import util
|
| 71 |
-
# import cv2
|
| 72 |
-
# import mmcv
|
| 73 |
-
# import torch
|
| 74 |
-
# import matplotlib.pyplot as plt
|
| 75 |
-
# from mmpose.apis import inference_topdown
|
| 76 |
-
# from mmpose.apis import init_model as init_pose_estimator
|
| 77 |
-
# from mmpose.evaluation.functional import nms
|
| 78 |
-
# from mmpose.utils import adapt_mmdet_pipeline
|
| 79 |
-
# from mmpose.structures import merge_data_samples
|
| 80 |
-
|
| 81 |
-
# from mmdet.apis import inference_detector, init_detector
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
# class Wholebody:
|
| 85 |
-
# def __init__(self):
|
| 86 |
-
# device = 'cuda:0'
|
| 87 |
-
# det_config = 'annotator/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py'
|
| 88 |
-
# det_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth'
|
| 89 |
-
# pose_config = 'annotator/dwpose/dwpose_config/dwpose-l_384x288.py'
|
| 90 |
-
# pose_ckpt = 'annotator/ckpts/dw-ll_ucoco_384.pth'
|
| 91 |
-
|
| 92 |
-
# # build detector
|
| 93 |
-
# self.detector = init_detector(det_config, det_ckpt, device=device)
|
| 94 |
-
# self.detector.cfg = adapt_mmdet_pipeline(self.detector.cfg)
|
| 95 |
-
|
| 96 |
-
# # build pose estimator
|
| 97 |
-
# self.pose_estimator = init_pose_estimator(
|
| 98 |
-
# pose_config,
|
| 99 |
-
# pose_ckpt,
|
| 100 |
-
# device=device)
|
| 101 |
-
|
| 102 |
-
# def __call__(self, oriImg):
|
| 103 |
-
# # predict bbox
|
| 104 |
-
# det_result = inference_detector(self.detector, oriImg)
|
| 105 |
-
# pred_instance = det_result.pred_instances.cpu().numpy()
|
| 106 |
-
# bboxes = np.concatenate(
|
| 107 |
-
# (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
|
| 108 |
-
# bboxes = bboxes[np.logical_and(pred_instance.labels == 0,
|
| 109 |
-
# pred_instance.scores > 0.3)]
|
| 110 |
-
# # # max value
|
| 111 |
-
# # if len(bboxes) > 0:
|
| 112 |
-
# # bboxes = bboxes[0].reshape(1,-1)
|
| 113 |
-
# bboxes = bboxes[nms(bboxes, 0.3), :4]
|
| 114 |
-
|
| 115 |
-
# # predict keypoints
|
| 116 |
-
# if len(bboxes) == 0:
|
| 117 |
-
# pose_results = inference_topdown(self.pose_estimator, oriImg)
|
| 118 |
-
# else:
|
| 119 |
-
# pose_results = inference_topdown(self.pose_estimator, oriImg, bboxes)
|
| 120 |
-
# preds = merge_data_samples(pose_results)
|
| 121 |
-
# preds = preds.pred_instances
|
| 122 |
-
|
| 123 |
-
# # preds = pose_results[0].pred_instances
|
| 124 |
-
# keypoints = preds.get('transformed_keypoints',
|
| 125 |
-
# preds.keypoints)
|
| 126 |
-
# if 'keypoint_scores' in preds:
|
| 127 |
-
# scores = preds.keypoint_scores
|
| 128 |
-
# else:
|
| 129 |
-
# scores = np.ones(keypoints.shape[:-1])
|
| 130 |
-
|
| 131 |
-
# if 'keypoints_visible' in preds:
|
| 132 |
-
# visible = preds.keypoints_visible
|
| 133 |
-
# else:
|
| 134 |
-
# visible = np.ones(keypoints.shape[:-1])
|
| 135 |
-
# keypoints_info = np.concatenate(
|
| 136 |
-
# (keypoints, scores[..., None], visible[..., None]),
|
| 137 |
-
# axis=-1)
|
| 138 |
-
# # compute neck joint
|
| 139 |
-
# neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
|
| 140 |
-
# # neck score when visualizing pred
|
| 141 |
-
# neck[:, 2:4] = np.logical_and(
|
| 142 |
-
# keypoints_info[:, 5, 2:4] > 0.3,
|
| 143 |
-
# keypoints_info[:, 6, 2:4] > 0.3).astype(int)
|
| 144 |
-
# new_keypoints_info = np.insert(
|
| 145 |
-
# keypoints_info, 17, neck, axis=1)
|
| 146 |
-
# mmpose_idx = [
|
| 147 |
-
# 17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3
|
| 148 |
-
# ]
|
| 149 |
-
# openpose_idx = [
|
| 150 |
-
# 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17
|
| 151 |
-
# ]
|
| 152 |
-
# new_keypoints_info[:, openpose_idx] = \
|
| 153 |
-
# new_keypoints_info[:, mmpose_idx]
|
| 154 |
-
# keypoints_info = new_keypoints_info
|
| 155 |
-
|
| 156 |
-
# keypoints, scores, visible = keypoints_info[
|
| 157 |
-
# ..., :2], keypoints_info[..., 2], keypoints_info[..., 3]
|
| 158 |
-
|
| 159 |
-
# return keypoints, scores
|
|
|
|
| 60 |
keypoints, scores = keypoints_info[
|
| 61 |
..., :2], keypoints_info[..., 2]
|
| 62 |
|
| 63 |
+
return keypoints, scores
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
video_diffusion/common/image_util.py
CHANGED
|
@@ -19,8 +19,6 @@ import torchvision.transforms.functional as F
|
|
| 19 |
import random
|
| 20 |
from scipy.ndimage import binary_dilation
|
| 21 |
import sys
|
| 22 |
-
sys.path.append('/home/xianyang/Data/code/FateZero/video_diffusion/gmflow')
|
| 23 |
-
from gmflow.gmflow import GMFlow
|
| 24 |
|
| 25 |
IMAGE_EXTENSION = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp", ".JPEG")
|
| 26 |
|
|
@@ -577,29 +575,6 @@ def sample_trajectories_new(video_path, device,height,width):
|
|
| 577 |
|
| 578 |
clips = list(range(len(frames)))
|
| 579 |
|
| 580 |
-
#=============== GM-flow estimate forward optical flow============#
|
| 581 |
-
# model = GMFlow(feature_channels=128,
|
| 582 |
-
# num_scales=1,
|
| 583 |
-
# upsample_factor=8,
|
| 584 |
-
# num_head=1,
|
| 585 |
-
# attention_type='swin',
|
| 586 |
-
# ffn_dim_expansion=4,
|
| 587 |
-
# num_transformer_layers=6,
|
| 588 |
-
# ).to(device)
|
| 589 |
-
# checkpoint = torch.load('/home/xianyang/Data/code/FRESCO/model/gmflow_sintel-0c07dcb3.pth', map_location=lambda storage, loc: storage)
|
| 590 |
-
# weights = checkpoint['model'] if 'model' in checkpoint else checkpoint
|
| 591 |
-
# model.load_state_dict(weights, strict=False)
|
| 592 |
-
# model.eval()
|
| 593 |
-
# finished_trajectories = []
|
| 594 |
-
|
| 595 |
-
# current_frames, next_frames = preprocess(frames[clips[:-1]], frames[clips[1:]], transforms, height,width)
|
| 596 |
-
# results_dict = model(current_frames.to(device), next_frames.to(device), attn_splits_list=[2],
|
| 597 |
-
# corr_radius_list=[-1], prop_radius_list=[-1], pred_bidir_flow=True)
|
| 598 |
-
# flow_pr = results_dict['flow_preds'][-1] # [2*B, 2, H, W]
|
| 599 |
-
# fwd_flows, bwd_flows = flow_pr.chunk(2) # [B, 2, H, W]
|
| 600 |
-
# predicted_flows = fwd_flows
|
| 601 |
-
#=============== GM-flow estimate forward optical flow============#
|
| 602 |
-
|
| 603 |
#=============== raft-large estimate forward optical flow============#
|
| 604 |
model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
|
| 605 |
model = model.eval()
|
|
|
|
| 19 |
import random
|
| 20 |
from scipy.ndimage import binary_dilation
|
| 21 |
import sys
|
|
|
|
|
|
|
| 22 |
|
| 23 |
IMAGE_EXTENSION = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp", ".JPEG")
|
| 24 |
|
|
|
|
| 575 |
|
| 576 |
clips = list(range(len(frames)))
|
| 577 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
#=============== raft-large estimate forward optical flow============#
|
| 579 |
model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
|
| 580 |
model = model.eval()
|