Spaces:

XiangpengYang
/

VideoGrain

Configuration error

App Files Files Community

XiangpengYang commited on Mar 1

Commit

00b18c3

1 Parent(s): c7c6869

clean code

Browse files

Files changed (3) hide show

README.md +7 -6
annotator/dwpose/wholebody.py +1 -97
video_diffusion/common/image_util.py +0 -25

README.md CHANGED Viewed

@@ -2,9 +2,10 @@
 ## [<a href="https://knightyxp.github.io/VideoGrain_project_page/" target="_blank">Project Page</a>]
 [![arXiv](https://img.shields.io/badge/arXiv-2502.17258-B31B1B.svg)](https://arxiv.org/abs/2502.17258)
-[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/papers/2502.17258)
 [![Project page](https://img.shields.io/badge/Project-Page-brightgreen)](https://knightyxp.github.io/VideoGrain_project_page/)
 <table class="center" border="1" cellspacing="0" cellpadding="5">
   <tr>
@@ -70,7 +71,7 @@ bash download_all.sh
 <details><summary>Click for ControlNet annotator weights (if you can not access to huggingface)</summary>
-You can download all the annotator checkpoints (such as DW-Pose, depth_zoe, depth_midas, and OpenPose, cost around 4G.) from [baidu](https://pan.baidu.com/s/1sgBFLFkdTCDTn4oqHjGb9A?pwd=pdm5) or [google](https://drive.google.com/file/d/1qOsmWshnFMMr8x1HteaTViTSQLh_4rle/view?usp=drive_link)
 Then extract them into ./annotator/ckpts
 </details>
@@ -97,10 +98,10 @@ or accelerate launch test.py --config config/part_level/adding_new_object/run_tw
 ```
 result
 ├── run_two_man
-│   ├── control                # control conditon
 │   ├── infer_samples
-│           ├── input             # the input video frames
-│           ├── masked_video.mp4    # check whether edit regions are accuratedly covered
 │   ├── sample
 │           ├── step_0                  # result image folder
 │           ├── step_0.mp4              # result video

 ## [<a href="https://knightyxp.github.io/VideoGrain_project_page/" target="_blank">Project Page</a>]
 [![arXiv](https://img.shields.io/badge/arXiv-2502.17258-B31B1B.svg)](https://arxiv.org/abs/2502.17258)
+[![HuggingFace Daily Papers Top1](https://img.shields.io/static/v1?label=HuggingFace%20Daily%20Papers&message=Top1&color=blue)](https://huggingface.co/papers/2502.17258)
 [![Project page](https://img.shields.io/badge/Project-Page-brightgreen)](https://knightyxp.github.io/VideoGrain_project_page/)
+![visitors](https://visitor-badge.laobi.icu/badge?page_id=knightyxp.VideoGrain&left_color=green&right_color=red)
+[![Demo Video - VideoGrain](https://img.shields.io/badge/Demo_Video-VideoGrain-red)](https://youtu.be/JKDLet618hU)
 <table class="center" border="1" cellspacing="0" cellpadding="5">
   <tr>
 <details><summary>Click for ControlNet annotator weights (if you can not access to huggingface)</summary>
+You can download all the annotator checkpoints (such as DW-Pose, depth_zoe, depth_midas, and OpenPose, cost around 4G) from [baidu](https://pan.baidu.com/s/1sgBFLFkdTCDTn4oqHjGb9A?pwd=pdm5) or [google](https://drive.google.com/file/d/1qOsmWshnFMMr8x1HteaTViTSQLh_4rle/view?usp=drive_link)
 Then extract them into ./annotator/ckpts
 </details>
 ```
 result
 ├── run_two_man
+│   ├── control                         # control conditon
 │   ├── infer_samples
+│           ├── input                   # the input video frames
+│           ├── masked_video.mp4        # check whether edit regions are accuratedly covered
 │   ├── sample
 │           ├── step_0                  # result image folder
 │           ├── step_0.mp4              # result video

annotator/dwpose/wholebody.py CHANGED Viewed

@@ -60,100 +60,4 @@ class Wholebody:
         keypoints, scores = keypoints_info[
             ..., :2], keypoints_info[..., 2]
-        return keypoints, scores
-# # Copyright (c) OpenMMLab. All rights reserved.
-# import numpy as np
-# from . import util
-# import cv2
-# import mmcv
-# import torch
-# import matplotlib.pyplot as plt
-# from mmpose.apis import inference_topdown
-# from mmpose.apis import init_model as init_pose_estimator
-# from mmpose.evaluation.functional import nms
-# from mmpose.utils import adapt_mmdet_pipeline
-# from mmpose.structures import merge_data_samples
-# from mmdet.apis import inference_detector, init_detector
-# class Wholebody:
-#     def __init__(self):
-#         device = 'cuda:0'
-#         det_config = 'annotator/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py'
-#         det_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth'
-#         pose_config = 'annotator/dwpose/dwpose_config/dwpose-l_384x288.py'
-#         pose_ckpt = 'annotator/ckpts/dw-ll_ucoco_384.pth'
-#         # build detector
-#         self.detector = init_detector(det_config, det_ckpt, device=device)
-#         self.detector.cfg = adapt_mmdet_pipeline(self.detector.cfg)
-#         # build pose estimator
-#         self.pose_estimator = init_pose_estimator(
-#             pose_config,
-#             pose_ckpt,
-#             device=device)
-#     def __call__(self, oriImg):
-#         # predict bbox
-#         det_result = inference_detector(self.detector, oriImg)
-#         pred_instance = det_result.pred_instances.cpu().numpy()
-#         bboxes = np.concatenate(
-#             (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
-#         bboxes = bboxes[np.logical_and(pred_instance.labels == 0,
-#                                     pred_instance.scores > 0.3)]
-#         # # max value
-#         # if len(bboxes) > 0:
-#         #     bboxes = bboxes[0].reshape(1,-1)
-#         bboxes = bboxes[nms(bboxes, 0.3), :4]
-#         # predict keypoints
-#         if len(bboxes) == 0:
-#             pose_results = inference_topdown(self.pose_estimator, oriImg)
-#         else:
-#             pose_results = inference_topdown(self.pose_estimator, oriImg, bboxes)
-#         preds = merge_data_samples(pose_results)
-#         preds = preds.pred_instances
-#         # preds = pose_results[0].pred_instances
-#         keypoints = preds.get('transformed_keypoints',
-#                                         preds.keypoints)
-#         if 'keypoint_scores' in preds:
-#             scores = preds.keypoint_scores
-#         else:
-#             scores = np.ones(keypoints.shape[:-1])
-#         if 'keypoints_visible' in preds:
-#             visible = preds.keypoints_visible
-#         else:
-#             visible = np.ones(keypoints.shape[:-1])
-#         keypoints_info = np.concatenate(
-#             (keypoints, scores[..., None], visible[..., None]),
-#             axis=-1)
-#         # compute neck joint
-#         neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
-#         # neck score when visualizing pred
-#         neck[:, 2:4] = np.logical_and(
-#             keypoints_info[:, 5, 2:4] > 0.3,
-#             keypoints_info[:, 6, 2:4] > 0.3).astype(int)
-#         new_keypoints_info = np.insert(
-#             keypoints_info, 17, neck, axis=1)
-#         mmpose_idx = [
-#             17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3
-#         ]
-#         openpose_idx = [
-#             1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17
-#         ]
-#         new_keypoints_info[:, openpose_idx] = \
-#             new_keypoints_info[:, mmpose_idx]
-#         keypoints_info = new_keypoints_info
-#         keypoints, scores, visible = keypoints_info[
-#             ..., :2], keypoints_info[..., 2], keypoints_info[..., 3]
-#         return keypoints, scores

         keypoints, scores = keypoints_info[
             ..., :2], keypoints_info[..., 2]
+        return keypoints, scores

video_diffusion/common/image_util.py CHANGED Viewed

@@ -19,8 +19,6 @@ import torchvision.transforms.functional as F
 import random
 from scipy.ndimage import binary_dilation
 import sys
-sys.path.append('/home/xianyang/Data/code/FateZero/video_diffusion/gmflow')
-from gmflow.gmflow import GMFlow
 IMAGE_EXTENSION = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp", ".JPEG")
@@ -577,29 +575,6 @@ def sample_trajectories_new(video_path, device,height,width):
     clips = list(range(len(frames)))
-    #=============== GM-flow estimate forward optical flow============#
-    # model = GMFlow(feature_channels=128,
-    #                num_scales=1,
-    #                upsample_factor=8,
-    #                num_head=1,
-    #                attention_type='swin',
-    #                ffn_dim_expansion=4,
-    #                num_transformer_layers=6,
-    #                ).to(device)
-    # checkpoint = torch.load('/home/xianyang/Data/code/FRESCO/model/gmflow_sintel-0c07dcb3.pth', map_location=lambda storage, loc: storage)
-    # weights = checkpoint['model'] if 'model' in checkpoint else checkpoint
-    # model.load_state_dict(weights, strict=False)
-    # model.eval()
-    # finished_trajectories = []
-    # current_frames, next_frames = preprocess(frames[clips[:-1]], frames[clips[1:]], transforms, height,width)
-    # results_dict = model(current_frames.to(device),  next_frames.to(device), attn_splits_list=[2],
-    #                           corr_radius_list=[-1], prop_radius_list=[-1], pred_bidir_flow=True)
-    # flow_pr = results_dict['flow_preds'][-1]  # [2*B, 2, H, W]
-    # fwd_flows, bwd_flows = flow_pr.chunk(2)   # [B, 2, H, W]
-    # predicted_flows = fwd_flows
-    #=============== GM-flow estimate forward optical flow============#
     #=============== raft-large estimate forward optical flow============#
     model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
     model = model.eval()

 import random
 from scipy.ndimage import binary_dilation
 import sys
 IMAGE_EXTENSION = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp", ".JPEG")
     clips = list(range(len(frames)))
     #=============== raft-large estimate forward optical flow============#
     model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
     model = model.eval()