Spaces:
Sleeping
Sleeping
7b38318811baa04ff0314e2a0700e7273cc8cccf91c9fe7f87200c3bcfd1c703
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +4 -0
- checkpoints/dpvo.pth +3 -0
- checkpoints/wham_vit_bedlam_w_3dpw.pth.tar +3 -0
- checkpoints/wham_vit_w_3dpw.pth.tar +3 -0
- checkpoints/yolov8x.pt +3 -0
- configs/__pycache__/config.cpython-39.pyc +0 -0
- configs/__pycache__/constants.cpython-39.pyc +0 -0
- configs/config.py +111 -0
- configs/constants.py +59 -0
- configs/yamls/demo.yaml +14 -0
- configs/yamls/model_base.yaml +7 -0
- configs/yamls/stage1.yaml +28 -0
- configs/yamls/stage2.yaml +37 -0
- configs/yamls/stage2_b.yaml +38 -0
- dataset/body_models/J_regressor_coco.npy +3 -0
- dataset/body_models/J_regressor_feet.npy +3 -0
- dataset/body_models/J_regressor_h36m.npy +3 -0
- dataset/body_models/J_regressor_wham.npy +3 -0
- dataset/body_models/smpl/SMPL_FEMALE.pkl +3 -0
- dataset/body_models/smpl/SMPL_MALE.pkl +3 -0
- dataset/body_models/smpl/SMPL_NEUTRAL.pkl +3 -0
- dataset/body_models/smpl/__MACOSX/._smpl +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/._.DS_Store +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/.___init__.py +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/._models +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/._smpl_webuser +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/models/basicModel_f_lbs_10_207_0_v1.0.0.pkl +3 -0
- dataset/body_models/smpl/__MACOSX/smpl/models/basicmodel_m_lbs_10_207_0_v1.0.0.pkl +3 -0
- dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._LICENSE.txt +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._README.txt +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/.___init__.py +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._hello_world +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._lbs.py +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._posemapper.py +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._serialization.py +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._verts.py +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._hello_smpl.py +0 -0
- dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._render_smpl.py +0 -0
- dataset/body_models/smpl_faces.npy +3 -0
- dataset/body_models/smpl_mean_params.npz +3 -0
- dataset/body_models/smplx2smpl.pkl +3 -0
- demo.py +234 -0
- docs/API.md +18 -0
- docs/DATASET.md +42 -0
- docs/DOCKER.md +23 -0
- docs/INSTALL.md +38 -0
- examples/IMG_9730.mov +3 -0
- examples/IMG_9731.mov +3 -0
- examples/IMG_9732.mov +3 -0
- examples/drone_calib.txt +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
examples/drone_video.mp4 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
examples/IMG_9730.mov filter=lfs diff=lfs merge=lfs -text
|
38 |
+
examples/IMG_9731.mov filter=lfs diff=lfs merge=lfs -text
|
39 |
+
examples/IMG_9732.mov filter=lfs diff=lfs merge=lfs -text
|
checkpoints/dpvo.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30d02dc2b88a321cf99aad8e4ea1152a44d791b5b65bf95ad036922819c0ff12
|
3 |
+
size 14167743
|
checkpoints/wham_vit_bedlam_w_3dpw.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91d250d2d298b00f200aa39df36253b55ca434188c2934d8e91e5e0777fb67fd
|
3 |
+
size 527307587
|
checkpoints/wham_vit_w_3dpw.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9835bcbc952221ad72fa72e768e1f4620e96788b12cecd676a3b1dbee057dd66
|
3 |
+
size 527307587
|
checkpoints/yolov8x.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4d5a3f000d771762f03fc8b57ebd0aae324aeaefdd6e68492a9c4470f2d1e8b
|
3 |
+
size 136867539
|
configs/__pycache__/config.cpython-39.pyc
ADDED
Binary file (3.01 kB). View file
|
|
configs/__pycache__/constants.cpython-39.pyc
ADDED
Binary file (2.77 kB). View file
|
|
configs/config.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from yacs.config import CfgNode as CN
|
3 |
+
|
4 |
+
# Configuration variable
|
5 |
+
cfg = CN()
|
6 |
+
|
7 |
+
cfg.TITLE = 'default'
|
8 |
+
cfg.OUTPUT_DIR = 'results'
|
9 |
+
cfg.EXP_NAME = 'default'
|
10 |
+
cfg.DEVICE = 'cuda'
|
11 |
+
cfg.DEBUG = False
|
12 |
+
cfg.EVAL = False
|
13 |
+
cfg.RESUME = False
|
14 |
+
cfg.LOGDIR = ''
|
15 |
+
cfg.NUM_WORKERS = 5
|
16 |
+
cfg.SEED_VALUE = -1
|
17 |
+
cfg.SUMMARY_ITER = 50
|
18 |
+
cfg.MODEL_CONFIG = ''
|
19 |
+
cfg.FLIP_EVAL = False
|
20 |
+
|
21 |
+
cfg.TRAIN = CN()
|
22 |
+
cfg.TRAIN.STAGE = 'stage1'
|
23 |
+
cfg.TRAIN.DATASET_EVAL = '3dpw'
|
24 |
+
cfg.TRAIN.CHECKPOINT = ''
|
25 |
+
cfg.TRAIN.BATCH_SIZE = 64
|
26 |
+
cfg.TRAIN.START_EPOCH = 0
|
27 |
+
cfg.TRAIN.END_EPOCH = 999
|
28 |
+
cfg.TRAIN.OPTIM = 'Adam'
|
29 |
+
cfg.TRAIN.LR = 3e-4
|
30 |
+
cfg.TRAIN.LR_FINETUNE = 5e-5
|
31 |
+
cfg.TRAIN.LR_PATIENCE = 5
|
32 |
+
cfg.TRAIN.LR_DECAY_RATIO = 0.1
|
33 |
+
cfg.TRAIN.WD = 0.0
|
34 |
+
cfg.TRAIN.MOMENTUM = 0.9
|
35 |
+
cfg.TRAIN.MILESTONES = [50, 70]
|
36 |
+
|
37 |
+
cfg.DATASET = CN()
|
38 |
+
cfg.DATASET.SEQLEN = 81
|
39 |
+
cfg.DATASET.RATIO = [1.0, 0, 0, 0, 0]
|
40 |
+
|
41 |
+
cfg.MODEL = CN()
|
42 |
+
cfg.MODEL.BACKBONE = 'vit'
|
43 |
+
|
44 |
+
cfg.LOSS = CN()
|
45 |
+
cfg.LOSS.SHAPE_LOSS_WEIGHT = 0.001
|
46 |
+
cfg.LOSS.JOINT2D_LOSS_WEIGHT = 5.
|
47 |
+
cfg.LOSS.JOINT3D_LOSS_WEIGHT = 5.
|
48 |
+
cfg.LOSS.VERTS3D_LOSS_WEIGHT = 1.
|
49 |
+
cfg.LOSS.POSE_LOSS_WEIGHT = 1.
|
50 |
+
cfg.LOSS.CASCADED_LOSS_WEIGHT = 0.0
|
51 |
+
cfg.LOSS.CONTACT_LOSS_WEIGHT = 0.04
|
52 |
+
cfg.LOSS.ROOT_VEL_LOSS_WEIGHT = 0.001
|
53 |
+
cfg.LOSS.ROOT_POSE_LOSS_WEIGHT = 0.4
|
54 |
+
cfg.LOSS.SLIDING_LOSS_WEIGHT = 0.5
|
55 |
+
cfg.LOSS.CAMERA_LOSS_WEIGHT = 0.04
|
56 |
+
cfg.LOSS.LOSS_WEIGHT = 60.
|
57 |
+
cfg.LOSS.CAMERA_LOSS_SKIP_EPOCH = 5
|
58 |
+
|
59 |
+
|
60 |
+
def get_cfg_defaults():
|
61 |
+
"""Get a yacs CfgNode object with default values for my_project."""
|
62 |
+
# Return a clone so that the defaults will not be altered
|
63 |
+
# This is for the "local variable" use pattern
|
64 |
+
return cfg.clone()
|
65 |
+
|
66 |
+
|
67 |
+
def get_cfg(args, test):
|
68 |
+
"""
|
69 |
+
Define configuration.
|
70 |
+
"""
|
71 |
+
import os
|
72 |
+
|
73 |
+
cfg = get_cfg_defaults()
|
74 |
+
if os.path.exists(args.cfg):
|
75 |
+
cfg.merge_from_file(args.cfg)
|
76 |
+
|
77 |
+
cfg.merge_from_list(args.opts)
|
78 |
+
if test:
|
79 |
+
cfg.merge_from_list(['EVAL', True])
|
80 |
+
|
81 |
+
return cfg.clone()
|
82 |
+
|
83 |
+
|
84 |
+
def bool_arg(value):
|
85 |
+
if value.lower() in ('yes', 'true', 't', 'y', '1'):
|
86 |
+
return True
|
87 |
+
elif value.lower() in ('no', 'false', 'f', 'n', '0'):
|
88 |
+
return False
|
89 |
+
|
90 |
+
|
91 |
+
def parse_args(test=False):
|
92 |
+
parser = argparse.ArgumentParser()
|
93 |
+
parser.add_argument('-c', '--cfg', type=str, default='./configs/debug.yaml', help='cfg file path')
|
94 |
+
parser.add_argument(
|
95 |
+
"--eval-set", type=str, default='3dpw', help="Evaluation dataset")
|
96 |
+
parser.add_argument(
|
97 |
+
"--eval-split", type=str, default='test', help="Evaluation data split")
|
98 |
+
parser.add_argument('--render', default=False, type=bool_arg,
|
99 |
+
help='Render SMPL meshes after the evaluation')
|
100 |
+
parser.add_argument('--save-results', default=False, type=bool_arg,
|
101 |
+
help='Save SMPL parameters after the evaluation')
|
102 |
+
parser.add_argument(
|
103 |
+
"opts", default=None, nargs=argparse.REMAINDER,
|
104 |
+
help="Modify config options using the command-line")
|
105 |
+
|
106 |
+
args = parser.parse_args()
|
107 |
+
print(args, end='\n\n')
|
108 |
+
cfg_file = args.cfg
|
109 |
+
cfg = get_cfg(args, test)
|
110 |
+
|
111 |
+
return cfg, cfg_file, args
|
configs/constants.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import absolute_import
|
2 |
+
from __future__ import print_function
|
3 |
+
from __future__ import division
|
4 |
+
|
5 |
+
import torch
|
6 |
+
|
7 |
+
IMG_FEAT_DIM = {
|
8 |
+
'resnet': 2048,
|
9 |
+
'vit': 1024
|
10 |
+
}
|
11 |
+
|
12 |
+
N_JOINTS = 17
|
13 |
+
root = 'dataset'
|
14 |
+
class PATHS:
|
15 |
+
# Raw data folders
|
16 |
+
PARSED_DATA = f'{root}/parsed_data'
|
17 |
+
AMASS_PTH = f'{root}/AMASS'
|
18 |
+
THREEDPW_PTH = f'{root}/3DPW'
|
19 |
+
HUMAN36M_PTH = f'{root}/Human36M'
|
20 |
+
RICH_PTH = f'{root}/RICH'
|
21 |
+
EMDB_PTH = f'{root}/EMDB'
|
22 |
+
|
23 |
+
# Processed labels
|
24 |
+
AMASS_LABEL = f'{root}/parsed_data/amass.pth'
|
25 |
+
THREEDPW_LABEL = f'{root}/parsed_data/3dpw_dset_backbone.pth'
|
26 |
+
MPII3D_LABEL = f'{root}/parsed_data/mpii3d_dset_backbone.pth'
|
27 |
+
HUMAN36M_LABEL = f'{root}/parsed_data/human36m_dset_backbone.pth'
|
28 |
+
INSTA_LABEL = f'{root}/parsed_data/insta_dset_backbone.pth'
|
29 |
+
BEDLAM_LABEL = f'{root}/parsed_data/bedlam_train_backbone.pth'
|
30 |
+
|
31 |
+
class KEYPOINTS:
|
32 |
+
NUM_JOINTS = N_JOINTS
|
33 |
+
H36M_TO_J17 = [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10, 0, 7, 9]
|
34 |
+
H36M_TO_J14 = H36M_TO_J17[:14]
|
35 |
+
J17_TO_H36M = [14, 3, 4, 5, 2, 1, 0, 15, 12, 16, 13, 9, 10, 11, 8, 7, 6]
|
36 |
+
COCO_AUG_DICT = f'{root}/body_models/coco_aug_dict.pth'
|
37 |
+
TREE = [[5, 6], 0, 0, 1, 2, -1, -1, 5, 6, 7, 8, -1, -1, 11, 12, 13, 14, 15, 15, 15, 16, 16, 16]
|
38 |
+
|
39 |
+
# STD scale for video noise
|
40 |
+
S_BIAS = 1e-1
|
41 |
+
S_JITTERING = 5e-2
|
42 |
+
S_PEAK = 3e-1
|
43 |
+
S_PEAK_MASK = 5e-3
|
44 |
+
S_MASK = 0.03
|
45 |
+
|
46 |
+
|
47 |
+
class BMODEL:
|
48 |
+
MAIN_JOINTS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] # reduced_joints
|
49 |
+
|
50 |
+
FLDR = f'{root}/body_models/smpl/'
|
51 |
+
SMPLX2SMPL = f'{root}/body_models/smplx2smpl.pkl'
|
52 |
+
FACES = f'{root}/body_models/smpl_faces.npy'
|
53 |
+
MEAN_PARAMS = f'{root}/body_models/smpl_mean_params.npz'
|
54 |
+
JOINTS_REGRESSOR_WHAM = f'{root}/body_models/J_regressor_wham.npy'
|
55 |
+
JOINTS_REGRESSOR_H36M = f'{root}/body_models/J_regressor_h36m.npy'
|
56 |
+
JOINTS_REGRESSOR_EXTRA = f'{root}/body_models/J_regressor_extra.npy'
|
57 |
+
JOINTS_REGRESSOR_FEET = f'{root}/body_models/J_regressor_feet.npy'
|
58 |
+
PARENTS = torch.tensor([
|
59 |
+
-1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 16, 17, 18, 19, 20, 21])
|
configs/yamls/demo.yaml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LOGDIR: ''
|
2 |
+
DEVICE: 'cuda'
|
3 |
+
EXP_NAME: 'demo'
|
4 |
+
OUTPUT_DIR: 'experiments/'
|
5 |
+
NUM_WORKERS: 0
|
6 |
+
MODEL_CONFIG: 'configs/yamls/model_base.yaml'
|
7 |
+
FLIP_EVAL: True
|
8 |
+
|
9 |
+
TRAIN:
|
10 |
+
STAGE: 'stage2'
|
11 |
+
CHECKPOINT: 'checkpoints/wham_vit_bedlam_w_3dpw.pth.tar'
|
12 |
+
|
13 |
+
MODEL:
|
14 |
+
BACKBONE: 'vit'
|
configs/yamls/model_base.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
architecture: 'RNN'
|
2 |
+
in_dim: 49
|
3 |
+
n_iters: 1
|
4 |
+
pose_dr: 0.15
|
5 |
+
d_embed: 512
|
6 |
+
n_layers: 3
|
7 |
+
layer: 'LSTM'
|
configs/yamls/stage1.yaml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LOGDIR: ''
|
2 |
+
DEVICE: 'cuda'
|
3 |
+
EXP_NAME: 'train_stage1'
|
4 |
+
OUTPUT_DIR: 'experiments/'
|
5 |
+
NUM_WORKERS: 8
|
6 |
+
MODEL_CONFIG: 'configs/yamls/model_base.yaml'
|
7 |
+
FLIP_EVAL: True
|
8 |
+
SEED_VALUE: 42
|
9 |
+
|
10 |
+
TRAIN:
|
11 |
+
LR: 5e-4
|
12 |
+
BATCH_SIZE: 64
|
13 |
+
END_EPOCH: 100
|
14 |
+
STAGE: 'stage1'
|
15 |
+
CHECKPOINT: ''
|
16 |
+
MILESTONES: [60, 80]
|
17 |
+
|
18 |
+
LOSS:
|
19 |
+
SHAPE_LOSS_WEIGHT: 0.004
|
20 |
+
JOINT3D_LOSS_WEIGHT: 0.4
|
21 |
+
JOINT2D_LOSS_WEIGHT: 0.1
|
22 |
+
POSE_LOSS_WEIGHT: 8.0
|
23 |
+
CASCADED_LOSS_WEIGHT: 0.0
|
24 |
+
SLIDING_LOSS_WEIGHT: 0.5
|
25 |
+
CAMERA_LOSS_WEIGHT: 0.04
|
26 |
+
ROOT_VEL_LOSS_WEIGHT: 0.001
|
27 |
+
LOSS_WEIGHT: 50.0
|
28 |
+
CAMERA_LOSS_SKIP_EPOCH: 5
|
configs/yamls/stage2.yaml
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LOGDIR: ''
|
2 |
+
DEVICE: 'cuda'
|
3 |
+
EXP_NAME: 'train_stage2'
|
4 |
+
OUTPUT_DIR: 'experiments'
|
5 |
+
NUM_WORKERS: 8
|
6 |
+
MODEL_CONFIG: 'configs/yamls/model_base.yaml'
|
7 |
+
FLIP_EVAL: True
|
8 |
+
SEED_VALUE: 42
|
9 |
+
|
10 |
+
TRAIN:
|
11 |
+
LR: 1e-4
|
12 |
+
LR_FINETUNE: 1e-5
|
13 |
+
STAGE: 'stage2'
|
14 |
+
CHECKPOINT: 'checkpoints/wham_stage1.pth.tar'
|
15 |
+
BATCH_SIZE: 64
|
16 |
+
END_EPOCH: 40
|
17 |
+
MILESTONES: [20, 30]
|
18 |
+
LR_DECAY_RATIO: 0.2
|
19 |
+
|
20 |
+
MODEL:
|
21 |
+
BACKBONE: 'vit'
|
22 |
+
|
23 |
+
LOSS:
|
24 |
+
SHAPE_LOSS_WEIGHT: 0.0
|
25 |
+
JOINT2D_LOSS_WEIGHT: 3.0
|
26 |
+
JOINT3D_LOSS_WEIGHT: 6.0
|
27 |
+
POSE_LOSS_WEIGHT: 1.0
|
28 |
+
CASCADED_LOSS_WEIGHT: 0.05
|
29 |
+
SLIDING_LOSS_WEIGHT: 0.5
|
30 |
+
CAMERA_LOSS_WEIGHT: 0.01
|
31 |
+
ROOT_VEL_LOSS_WEIGHT: 0.001
|
32 |
+
LOSS_WEIGHT: 60.0
|
33 |
+
CAMERA_LOSS_SKIP_EPOCH: 0
|
34 |
+
|
35 |
+
DATASET:
|
36 |
+
SEQLEN: 81
|
37 |
+
RATIO: [0.2, 0.2, 0.2, 0.2, 0.2]
|
configs/yamls/stage2_b.yaml
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LOGDIR: ''
|
2 |
+
DEVICE: 'cuda'
|
3 |
+
EXP_NAME: 'train_stage2_b'
|
4 |
+
OUTPUT_DIR: 'experiments'
|
5 |
+
NUM_WORKERS: 8
|
6 |
+
MODEL_CONFIG: 'configs/yamls/model_base.yaml'
|
7 |
+
FLIP_EVAL: True
|
8 |
+
SEED_VALUE: 42
|
9 |
+
|
10 |
+
TRAIN:
|
11 |
+
LR: 1e-4
|
12 |
+
LR_FINETUNE: 1e-5
|
13 |
+
STAGE: 'stage2'
|
14 |
+
CHECKPOINT: 'checkpoints/wham_stage1.pth.tar'
|
15 |
+
BATCH_SIZE: 64
|
16 |
+
END_EPOCH: 80
|
17 |
+
MILESTONES: [40, 50, 70]
|
18 |
+
LR_DECAY_RATIO: 0.2
|
19 |
+
|
20 |
+
MODEL:
|
21 |
+
BACKBONE: 'vit'
|
22 |
+
|
23 |
+
LOSS:
|
24 |
+
SHAPE_LOSS_WEIGHT: 0.0
|
25 |
+
JOINT2D_LOSS_WEIGHT: 5.0
|
26 |
+
JOINT3D_LOSS_WEIGHT: 5.0
|
27 |
+
VERTS3D_LOSS_WEIGHT: 1.0
|
28 |
+
POSE_LOSS_WEIGHT: 3.0
|
29 |
+
CASCADED_LOSS_WEIGHT: 0.05
|
30 |
+
SLIDING_LOSS_WEIGHT: 0.5
|
31 |
+
CAMERA_LOSS_WEIGHT: 0.01
|
32 |
+
ROOT_VEL_LOSS_WEIGHT: 0.001
|
33 |
+
LOSS_WEIGHT: 60.0
|
34 |
+
CAMERA_LOSS_SKIP_EPOCH: 0
|
35 |
+
|
36 |
+
DATASET:
|
37 |
+
SEQLEN: 81
|
38 |
+
RATIO: [0.2, 0.2, 0.2, 0.2, 0.0, 0.2]
|
dataset/body_models/J_regressor_coco.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cd49241810715e752aa7384363b7bc09fb96b386ca99aa1c3eb2c0d15d6b8b9
|
3 |
+
size 468648
|
dataset/body_models/J_regressor_feet.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ef9e6d64796f2f342983a9fde6a6d9f8e3544f1239e7f86aa4f6b7aa82f4cf6
|
3 |
+
size 220608
|
dataset/body_models/J_regressor_h36m.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c655cd7013d7829eb9acbebf0e43f952a3fa0305a53c35880e39192bfb6444a0
|
3 |
+
size 937168
|
dataset/body_models/J_regressor_wham.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f938dcfd5cd88d0b19ee34e442d49f1dc370d3d8c4f5aef57a93d0cf2e267c4c
|
3 |
+
size 854488
|
dataset/body_models/smpl/SMPL_FEMALE.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a583c1b98e4afc19042641f1bae5cd8a1f712a6724886291a7627ec07acd408d
|
3 |
+
size 39056454
|
dataset/body_models/smpl/SMPL_MALE.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e8c0bbbbc635dcb166ed29c303fb4bef16ea5f623e5a89263495a9e403575bd
|
3 |
+
size 39056404
|
dataset/body_models/smpl/SMPL_NEUTRAL.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98e65c74ad9b998783132f00880d1025a8d64b158e040e6ef13a557e5098bc42
|
3 |
+
size 39001280
|
dataset/body_models/smpl/__MACOSX/._smpl
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/._.DS_Store
ADDED
Binary file (120 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/.___init__.py
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/._models
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/._smpl_webuser
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/models/basicModel_f_lbs_10_207_0_v1.0.0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a583c1b98e4afc19042641f1bae5cd8a1f712a6724886291a7627ec07acd408d
|
3 |
+
size 39056454
|
dataset/body_models/smpl/__MACOSX/smpl/models/basicmodel_m_lbs_10_207_0_v1.0.0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e8c0bbbbc635dcb166ed29c303fb4bef16ea5f623e5a89263495a9e403575bd
|
3 |
+
size 39056404
|
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._LICENSE.txt
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._README.txt
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/.___init__.py
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._hello_world
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._lbs.py
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._posemapper.py
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._serialization.py
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._verts.py
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._hello_smpl.py
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._render_smpl.py
ADDED
Binary file (239 Bytes). View file
|
|
dataset/body_models/smpl_faces.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51fc11ebadb0487d74bef220c4eea43f014609249f0121413c1fc629d859fecb
|
3 |
+
size 165392
|
dataset/body_models/smpl_mean_params.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fd6dd687800da946d0a0492383f973b92ec20f166a0b829775882868c35fcdd
|
3 |
+
size 1310
|
dataset/body_models/smplx2smpl.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1d912d121ad98132e4492d8e7a0f1a8cf4412811e14a7ef8cb337bb48eef99e
|
3 |
+
size 578019251
|
demo.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import argparse
|
3 |
+
import os.path as osp
|
4 |
+
from glob import glob
|
5 |
+
from collections import defaultdict
|
6 |
+
|
7 |
+
import cv2
|
8 |
+
import torch
|
9 |
+
import joblib
|
10 |
+
import numpy as np
|
11 |
+
from loguru import logger
|
12 |
+
from progress.bar import Bar
|
13 |
+
|
14 |
+
from configs.config import get_cfg_defaults
|
15 |
+
from lib.data.datasets import CustomDataset
|
16 |
+
from lib.utils.imutils import avg_preds
|
17 |
+
from lib.utils.transforms import matrix_to_axis_angle
|
18 |
+
from lib.models import build_network, build_body_model
|
19 |
+
from lib.models.preproc.detector import DetectionModel
|
20 |
+
from lib.models.preproc.extractor import FeatureExtractor
|
21 |
+
from lib.models.smplify import TemporalSMPLify
|
22 |
+
|
23 |
+
try:
|
24 |
+
from lib.models.preproc.slam import SLAMModel
|
25 |
+
_run_global = True
|
26 |
+
except:
|
27 |
+
logger.info('DPVO is not properly installed. Only estimate in local coordinates !')
|
28 |
+
_run_global = False
|
29 |
+
|
30 |
+
def run(cfg,
|
31 |
+
video,
|
32 |
+
output_pth,
|
33 |
+
network,
|
34 |
+
calib=None,
|
35 |
+
run_global=True,
|
36 |
+
save_pkl=False,
|
37 |
+
visualize=False):
|
38 |
+
|
39 |
+
cap = cv2.VideoCapture(video)
|
40 |
+
assert cap.isOpened(), f'Faild to load video file {video}'
|
41 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
42 |
+
length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
43 |
+
width, height = cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
44 |
+
|
45 |
+
# Whether or not estimating motion in global coordinates
|
46 |
+
run_global = run_global and _run_global
|
47 |
+
|
48 |
+
# Preprocess
|
49 |
+
with torch.no_grad():
|
50 |
+
if not (osp.exists(osp.join(output_pth, 'tracking_results.pth')) and
|
51 |
+
osp.exists(osp.join(output_pth, 'slam_results.pth'))):
|
52 |
+
|
53 |
+
detector = DetectionModel(cfg.DEVICE.lower())
|
54 |
+
extractor = FeatureExtractor(cfg.DEVICE.lower(), cfg.FLIP_EVAL)
|
55 |
+
|
56 |
+
if run_global: slam = SLAMModel(video, output_pth, width, height, calib)
|
57 |
+
else: slam = None
|
58 |
+
|
59 |
+
bar = Bar('Preprocess: 2D detection and SLAM', fill='#', max=length)
|
60 |
+
while (cap.isOpened()):
|
61 |
+
flag, img = cap.read()
|
62 |
+
if not flag: break
|
63 |
+
|
64 |
+
# 2D detection and tracking
|
65 |
+
detector.track(img, fps, length)
|
66 |
+
|
67 |
+
# SLAM
|
68 |
+
if slam is not None:
|
69 |
+
slam.track()
|
70 |
+
|
71 |
+
bar.next()
|
72 |
+
|
73 |
+
tracking_results = detector.process(fps)
|
74 |
+
|
75 |
+
if slam is not None:
|
76 |
+
slam_results = slam.process()
|
77 |
+
else:
|
78 |
+
slam_results = np.zeros((length, 7))
|
79 |
+
slam_results[:, 3] = 1.0 # Unit quaternion
|
80 |
+
|
81 |
+
# Extract image features
|
82 |
+
# TODO: Merge this into the previous while loop with an online bbox smoothing.
|
83 |
+
tracking_results = extractor.run(video, tracking_results)
|
84 |
+
logger.info('Complete Data preprocessing!')
|
85 |
+
|
86 |
+
# Save the processed data
|
87 |
+
joblib.dump(tracking_results, osp.join(output_pth, 'tracking_results.pth'))
|
88 |
+
joblib.dump(slam_results, osp.join(output_pth, 'slam_results.pth'))
|
89 |
+
logger.info(f'Save processed data at {output_pth}')
|
90 |
+
|
91 |
+
# If the processed data already exists, load the processed data
|
92 |
+
else:
|
93 |
+
tracking_results = joblib.load(osp.join(output_pth, 'tracking_results.pth'))
|
94 |
+
slam_results = joblib.load(osp.join(output_pth, 'slam_results.pth'))
|
95 |
+
logger.info(f'Already processed data exists at {output_pth} ! Load the data .')
|
96 |
+
|
97 |
+
# Build dataset
|
98 |
+
dataset = CustomDataset(cfg, tracking_results, slam_results, width, height, fps)
|
99 |
+
|
100 |
+
# run WHAM
|
101 |
+
results = defaultdict(dict)
|
102 |
+
|
103 |
+
n_subjs = len(dataset)
|
104 |
+
for subj in range(n_subjs):
|
105 |
+
|
106 |
+
with torch.no_grad():
|
107 |
+
if cfg.FLIP_EVAL:
|
108 |
+
# Forward pass with flipped input
|
109 |
+
flipped_batch = dataset.load_data(subj, True)
|
110 |
+
_id, x, inits, features, mask, init_root, cam_angvel, frame_id, kwargs = flipped_batch
|
111 |
+
flipped_pred = network(x, inits, features, mask=mask, init_root=init_root, cam_angvel=cam_angvel, return_y_up=True, **kwargs)
|
112 |
+
|
113 |
+
# Forward pass with normal input
|
114 |
+
batch = dataset.load_data(subj)
|
115 |
+
_id, x, inits, features, mask, init_root, cam_angvel, frame_id, kwargs = batch
|
116 |
+
pred = network(x, inits, features, mask=mask, init_root=init_root, cam_angvel=cam_angvel, return_y_up=True, **kwargs)
|
117 |
+
|
118 |
+
# Merge two predictions
|
119 |
+
flipped_pose, flipped_shape = flipped_pred['pose'].squeeze(0), flipped_pred['betas'].squeeze(0)
|
120 |
+
pose, shape = pred['pose'].squeeze(0), pred['betas'].squeeze(0)
|
121 |
+
flipped_pose, pose = flipped_pose.reshape(-1, 24, 6), pose.reshape(-1, 24, 6)
|
122 |
+
avg_pose, avg_shape = avg_preds(pose, shape, flipped_pose, flipped_shape)
|
123 |
+
avg_pose = avg_pose.reshape(-1, 144)
|
124 |
+
avg_contact = (flipped_pred['contact'][..., [2, 3, 0, 1]] + pred['contact']) / 2
|
125 |
+
|
126 |
+
# Refine trajectory with merged prediction
|
127 |
+
network.pred_pose = avg_pose.view_as(network.pred_pose)
|
128 |
+
network.pred_shape = avg_shape.view_as(network.pred_shape)
|
129 |
+
network.pred_contact = avg_contact.view_as(network.pred_contact)
|
130 |
+
output = network.forward_smpl(**kwargs)
|
131 |
+
pred = network.refine_trajectory(output, cam_angvel, return_y_up=True)
|
132 |
+
|
133 |
+
else:
|
134 |
+
# data
|
135 |
+
batch = dataset.load_data(subj)
|
136 |
+
_id, x, inits, features, mask, init_root, cam_angvel, frame_id, kwargs = batch
|
137 |
+
|
138 |
+
# inference
|
139 |
+
pred = network(x, inits, features, mask=mask, init_root=init_root, cam_angvel=cam_angvel, return_y_up=True, **kwargs)
|
140 |
+
|
141 |
+
# if False:
|
142 |
+
if args.run_smplify:
|
143 |
+
smplify = TemporalSMPLify(smpl, img_w=width, img_h=height, device=cfg.DEVICE)
|
144 |
+
input_keypoints = dataset.tracking_results[_id]['keypoints']
|
145 |
+
pred = smplify.fit(pred, input_keypoints, **kwargs)
|
146 |
+
|
147 |
+
with torch.no_grad():
|
148 |
+
network.pred_pose = pred['pose']
|
149 |
+
network.pred_shape = pred['betas']
|
150 |
+
network.pred_cam = pred['cam']
|
151 |
+
output = network.forward_smpl(**kwargs)
|
152 |
+
pred = network.refine_trajectory(output, cam_angvel, return_y_up=True)
|
153 |
+
|
154 |
+
# ========= Store results ========= #
|
155 |
+
pred_body_pose = matrix_to_axis_angle(pred['poses_body']).cpu().numpy().reshape(-1, 69)
|
156 |
+
pred_root = matrix_to_axis_angle(pred['poses_root_cam']).cpu().numpy().reshape(-1, 3)
|
157 |
+
pred_root_world = matrix_to_axis_angle(pred['poses_root_world']).cpu().numpy().reshape(-1, 3)
|
158 |
+
pred_pose = np.concatenate((pred_root, pred_body_pose), axis=-1)
|
159 |
+
pred_pose_world = np.concatenate((pred_root_world, pred_body_pose), axis=-1)
|
160 |
+
pred_trans = (pred['trans_cam'] - network.output.offset).cpu().numpy()
|
161 |
+
|
162 |
+
results[_id]['pose'] = pred_pose
|
163 |
+
results[_id]['trans'] = pred_trans
|
164 |
+
results[_id]['pose_world'] = pred_pose_world
|
165 |
+
results[_id]['trans_world'] = pred['trans_world'].cpu().squeeze(0).numpy()
|
166 |
+
results[_id]['betas'] = pred['betas'].cpu().squeeze(0).numpy()
|
167 |
+
results[_id]['verts'] = (pred['verts_cam'] + pred['trans_cam'].unsqueeze(1)).cpu().numpy()
|
168 |
+
results[_id]['frame_ids'] = frame_id
|
169 |
+
|
170 |
+
if save_pkl:
|
171 |
+
joblib.dump(results, osp.join(output_pth, "wham_output.pkl"))
|
172 |
+
|
173 |
+
# Visualize
|
174 |
+
if visualize:
|
175 |
+
from lib.vis.run_vis import run_vis_on_demo
|
176 |
+
with torch.no_grad():
|
177 |
+
run_vis_on_demo(cfg, video, results, output_pth, network.smpl, vis_global=run_global)
|
178 |
+
|
179 |
+
|
180 |
+
if __name__ == '__main__':
|
181 |
+
parser = argparse.ArgumentParser()
|
182 |
+
|
183 |
+
parser.add_argument('--video', type=str,
|
184 |
+
default='examples/demo_video.mp4',
|
185 |
+
help='input video path or youtube link')
|
186 |
+
|
187 |
+
parser.add_argument('--output_pth', type=str, default='output/demo',
|
188 |
+
help='output folder to write results')
|
189 |
+
|
190 |
+
parser.add_argument('--calib', type=str, default=None,
|
191 |
+
help='Camera calibration file path')
|
192 |
+
|
193 |
+
parser.add_argument('--estimate_local_only', action='store_true',
|
194 |
+
help='Only estimate motion in camera coordinate if True')
|
195 |
+
|
196 |
+
parser.add_argument('--visualize', action='store_true',
|
197 |
+
help='Visualize the output mesh if True')
|
198 |
+
|
199 |
+
parser.add_argument('--save_pkl', action='store_true',
|
200 |
+
help='Save output as pkl file')
|
201 |
+
|
202 |
+
parser.add_argument('--run_smplify', action='store_true',
|
203 |
+
help='Run Temporal SMPLify for post processing')
|
204 |
+
|
205 |
+
args = parser.parse_args()
|
206 |
+
|
207 |
+
cfg = get_cfg_defaults()
|
208 |
+
cfg.merge_from_file('configs/yamls/demo.yaml')
|
209 |
+
|
210 |
+
logger.info(f'GPU name -> {torch.cuda.get_device_name()}')
|
211 |
+
logger.info(f'GPU feat -> {torch.cuda.get_device_properties("cuda")}')
|
212 |
+
|
213 |
+
# ========= Load WHAM ========= #
|
214 |
+
smpl_batch_size = cfg.TRAIN.BATCH_SIZE * cfg.DATASET.SEQLEN
|
215 |
+
smpl = build_body_model(cfg.DEVICE, smpl_batch_size)
|
216 |
+
network = build_network(cfg, smpl)
|
217 |
+
network.eval()
|
218 |
+
|
219 |
+
# Output folder
|
220 |
+
sequence = '.'.join(args.video.split('/')[-1].split('.')[:-1])
|
221 |
+
output_pth = osp.join(args.output_pth, sequence)
|
222 |
+
os.makedirs(output_pth, exist_ok=True)
|
223 |
+
|
224 |
+
run(cfg,
|
225 |
+
args.video,
|
226 |
+
output_pth,
|
227 |
+
network,
|
228 |
+
args.calib,
|
229 |
+
run_global=not args.estimate_local_only,
|
230 |
+
save_pkl=args.save_pkl,
|
231 |
+
visualize=args.visualize)
|
232 |
+
|
233 |
+
print()
|
234 |
+
logger.info('Done !')
|
docs/API.md
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Python API
|
2 |
+
|
3 |
+
To use python API of WHAM, please finish the basic installation first ([Installation](INSTALL.md) or [Docker](DOCKER.md)).
|
4 |
+
|
5 |
+
If you use Docker environment, please run:
|
6 |
+
|
7 |
+
```bash
|
8 |
+
cd /path/to/WHAM
|
9 |
+
docker run -it -v .:/code/ --rm yusun9/wham-vitpose-dpvo-cuda11.3-python3.9 python
|
10 |
+
```
|
11 |
+
|
12 |
+
Then you can run wham via python code like
|
13 |
+
```bash
|
14 |
+
from wham_api import WHAM_API
|
15 |
+
wham_model = WHAM_API()
|
16 |
+
input_video_path = 'examples/IMG_9732.mov'
|
17 |
+
results, tracking_results, slam_results = wham_model(input_video_path)
|
18 |
+
```
|
docs/DATASET.md
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Dataset
|
2 |
+
|
3 |
+
## Training Data
|
4 |
+
We use [AMASS](https://amass.is.tue.mpg.de/), [InstaVariety](https://github.com/akanazawa/human_dynamics/blob/master/doc/insta_variety.md), [MPI-INF-3DHP](https://vcai.mpi-inf.mpg.de/3dhp-dataset/), [Human3.6M](http://vision.imar.ro/human3.6m/description.php), and [3DPW](https://virtualhumans.mpi-inf.mpg.de/3DPW/) datasets for training. Please register to their websites to download and process the data. You can download parsed ViT version of InstaVariety, MPI-INF-3DHP, Human3.6M, and 3DPW data from the [Google Drive](https://drive.google.com/drive/folders/13T2ghVvrw_fEk3X-8L0e6DVSYx_Og8o3?usp=sharing). You can save the data under `dataset/parsed_data` folder.
|
5 |
+
|
6 |
+
### Process AMASS dataset
|
7 |
+
After downloading AMASS dataset, you can process it by running:
|
8 |
+
```bash
|
9 |
+
python -m lib.data_utils.amass_utils
|
10 |
+
```
|
11 |
+
The processed data will be stored at `dataset/parsed_data/amass.pth`.
|
12 |
+
|
13 |
+
### Process 3DPW, MPII3D, Human3.6M, and InstaVariety datasets
|
14 |
+
First, visit [TCMR](https://github.com/hongsukchoi/TCMR_RELEASE) and download preprocessed data at `dataset/parsed_data/TCMR_preproc/'.
|
15 |
+
|
16 |
+
Next, prepare 2D keypoints detection using [ViTPose](https://github.com/ViTAE-Transformer/ViTPose) and store the results at `dataset/detection_results/\<DATAsET-NAME>/\<SEQUENCE_NAME.npy>'. You may need to download all images to prepare the detection results.
|
17 |
+
|
18 |
+
For Human36M, MPII3D, and InstaVariety datasets, you need to also download [NeuralAnnot](https://github.com/mks0601/NeuralAnnot_RELEASE) pseudo groundtruth SMPL label. As mentioned in our paper, we do not supervise WHAM on this label, but use it for neural initialization step.
|
19 |
+
|
20 |
+
Finally, run following codes to preprocess all training data.
|
21 |
+
```bash
|
22 |
+
python -m lib.data_utils.threedpw_train_utils # 3DPW dataset
|
23 |
+
# [Coming] python -m lib.data_utils.human36m_train_utils # Human3.6M dataset
|
24 |
+
# [Coming] python -m lib.data_utils.mpii3d_train_utils # MPI-INF-3DHP dataset
|
25 |
+
# [Coming] python -m lib.data_utils.insta_train_utils # InstaVariety dataset
|
26 |
+
```
|
27 |
+
|
28 |
+
### Process BEDLAM dataset
|
29 |
+
Will be updated.
|
30 |
+
|
31 |
+
## Evaluation Data
|
32 |
+
We use [3DPW](https://virtualhumans.mpi-inf.mpg.de/3DPW/), [RICH](https://rich.is.tue.mpg.de/), and [EMDB](https://eth-ait.github.io/emdb/) for the evaluation. We provide the parsed data for the evaluation. Please download the data from [Google Drive](https://drive.google.com/drive/folders/13T2ghVvrw_fEk3X-8L0e6DVSYx_Og8o3?usp=sharing) and place them at `dataset/parsed_data/`.
|
33 |
+
|
34 |
+
To process the data at your end, please
|
35 |
+
1) Download parsed 3DPW data from [TCMR](https://github.com/hongsukchoi/TCMR_RELEASE) and store `dataset/parsed_data/TCMR_preproc/'.
|
36 |
+
2) Run [ViTPose](https://github.com/ViTAE-Transformer/ViTPose) on all test data and store the results at `dataset/detection_results/\<DATAsET-NAME>'.
|
37 |
+
3) Run following codes.
|
38 |
+
```bash
|
39 |
+
python -m lib.data_utils.threedpw_eval_utils --split <"val" or "test"> # 3DPW dataset
|
40 |
+
python -m lib.data_utils.emdb_eval_utils --split <"1" or "2"> # EMDB dataset
|
41 |
+
python -m lib.data_utils.rich_eval_utils # RICH dataset
|
42 |
+
```
|
docs/DOCKER.md
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Installation
|
2 |
+
|
3 |
+
### Pre-requirments
|
4 |
+
1. Please make sure that you have properly installed the [Docker](https://www.docker.com/) and [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) before installation.
|
5 |
+
|
6 |
+
2. Please prepare the essential data for inference:
|
7 |
+
To download SMPL body models (Neutral, Female, and Male), you need to register for [SMPL](https://smpl.is.tue.mpg.de/) and [SMPLify](https://smplify.is.tue.mpg.de/). The username and password for both homepages will be used while fetching the demo data.
|
8 |
+
Next, run the following script to fetch demo data. This script will download all the required dependencies including trained models and demo videos.
|
9 |
+
```bash
|
10 |
+
bash fetch_demo_data.sh
|
11 |
+
```
|
12 |
+
|
13 |
+
### Usage
|
14 |
+
1. Pulling the docker image from docker hub:
|
15 |
+
```bash
|
16 |
+
docker pull yusun9/wham-vitpose-dpvo-cuda11.3-python3.9:latest
|
17 |
+
```
|
18 |
+
|
19 |
+
2. Run the code with docker environment:
|
20 |
+
```bash
|
21 |
+
cd /path/to/WHAM
|
22 |
+
docker run -v .:/code/ --rm yusun9/wham-vitpose-dpvo-cuda11.3-python3.9 python demo.py --video examples/IMG_9732.mov
|
23 |
+
```
|
docs/INSTALL.md
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Installation
|
2 |
+
|
3 |
+
WHAM has been implemented and tested on Ubuntu 20.04 and 22.04 with python = 3.9. We provide [anaconda](https://www.anaconda.com/) environment to run WHAM as below.
|
4 |
+
|
5 |
+
```bash
|
6 |
+
# Clone the repo
|
7 |
+
git clone https://github.com/yohanshin/WHAM.git --recursive
|
8 |
+
cd WHAM/
|
9 |
+
|
10 |
+
# Create Conda environment
|
11 |
+
conda create -n wham python=3.9
|
12 |
+
conda activate wham
|
13 |
+
|
14 |
+
# Install PyTorch libraries
|
15 |
+
conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch
|
16 |
+
|
17 |
+
# Install PyTorch3D (optional) for visualization
|
18 |
+
conda install -c fvcore -c iopath -c conda-forge fvcore iopath
|
19 |
+
pip install pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py39_cu113_pyt1110/download.html
|
20 |
+
|
21 |
+
# Install WHAM dependencies
|
22 |
+
pip install -r requirements.txt
|
23 |
+
|
24 |
+
# Install ViTPose
|
25 |
+
pip install -v -e third-party/ViTPose
|
26 |
+
|
27 |
+
# Install DPVO
|
28 |
+
cd third-party/DPVO
|
29 |
+
wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip
|
30 |
+
unzip eigen-3.4.0.zip -d thirdparty && rm -rf eigen-3.4.0.zip
|
31 |
+
conda install pytorch-scatter=2.0.9 -c rusty1s
|
32 |
+
conda install cudatoolkit-dev=11.3.1 -c conda-forge
|
33 |
+
|
34 |
+
# ONLY IF your GCC version is larger than 10
|
35 |
+
conda install -c conda-forge gxx=9.5
|
36 |
+
|
37 |
+
pip install .
|
38 |
+
```
|
examples/IMG_9730.mov
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3739b87ba0c64d047df3d8f5479c530377788fdab4c2283925477894a1d252f9
|
3 |
+
size 21526220
|
examples/IMG_9731.mov
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:116ad3f95743524283a234fd9e7a1152b28a04536ab5975f4e4e71c547d9e1a6
|
3 |
+
size 22633328
|
examples/IMG_9732.mov
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:168773c92e0112361dcd1da4154c915983490e58ff89102c1a65edb28d505813
|
3 |
+
size 23960355
|
examples/drone_calib.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
1321.0 1321.0 960.0 540.0
|