[WIP] Uploading WHAM folder using upload_folder function (multi-commit 32885d44e168e469262014116685345205a946d497a505f80c72efc2bff4741f)

#6
by Techt3o - opened
This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +17 -0
  2. LICENSE +21 -0
  3. README.md +120 -11
  4. checkpoints/dpvo.pth +3 -0
  5. checkpoints/hmr2a.ckpt +3 -0
  6. checkpoints/vitpose-h-multi-coco.pth +3 -0
  7. checkpoints/wham_vit_bedlam_w_3dpw.pth.tar +3 -0
  8. checkpoints/wham_vit_w_3dpw.pth.tar +3 -0
  9. checkpoints/yolov8x.pt +3 -0
  10. configs/__pycache__/config.cpython-39.pyc +0 -0
  11. configs/__pycache__/constants.cpython-39.pyc +0 -0
  12. configs/config.py +111 -0
  13. configs/constants.py +59 -0
  14. configs/yamls/demo.yaml +14 -0
  15. configs/yamls/model_base.yaml +7 -0
  16. configs/yamls/stage1.yaml +28 -0
  17. configs/yamls/stage2.yaml +37 -0
  18. configs/yamls/stage2_b.yaml +38 -0
  19. dataset/body_models/J_regressor_coco.npy +3 -0
  20. dataset/body_models/J_regressor_feet.npy +3 -0
  21. dataset/body_models/J_regressor_h36m.npy +3 -0
  22. dataset/body_models/J_regressor_wham.npy +3 -0
  23. dataset/body_models/smpl/SMPL_FEMALE.pkl +3 -0
  24. dataset/body_models/smpl/SMPL_MALE.pkl +3 -0
  25. dataset/body_models/smpl/SMPL_NEUTRAL.pkl +3 -0
  26. dataset/body_models/smpl/__MACOSX/._smpl +0 -0
  27. dataset/body_models/smpl/__MACOSX/smpl/._.DS_Store +0 -0
  28. dataset/body_models/smpl/__MACOSX/smpl/.___init__.py +0 -0
  29. dataset/body_models/smpl/__MACOSX/smpl/._models +0 -0
  30. dataset/body_models/smpl/__MACOSX/smpl/._smpl_webuser +0 -0
  31. dataset/body_models/smpl/__MACOSX/smpl/models/basicModel_f_lbs_10_207_0_v1.0.0.pkl +3 -0
  32. dataset/body_models/smpl/__MACOSX/smpl/models/basicmodel_m_lbs_10_207_0_v1.0.0.pkl +3 -0
  33. dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._LICENSE.txt +0 -0
  34. dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._README.txt +0 -0
  35. dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/.___init__.py +0 -0
  36. dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._hello_world +0 -0
  37. dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._lbs.py +0 -0
  38. dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._posemapper.py +0 -0
  39. dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._serialization.py +0 -0
  40. dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._verts.py +0 -0
  41. dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._hello_smpl.py +0 -0
  42. dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._render_smpl.py +0 -0
  43. dataset/body_models/smpl_faces.npy +3 -0
  44. dataset/body_models/smpl_mean_params.npz +3 -0
  45. dataset/body_models/smplx2smpl.pkl +3 -0
  46. demo.py +234 -0
  47. docs/API.md +18 -0
  48. docs/DATASET.md +42 -0
  49. docs/DOCKER.md +23 -0
  50. docs/INSTALL.md +38 -0
.gitattributes CHANGED
@@ -33,3 +33,20 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/drone_video.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ examples/IMG_9730.mov filter=lfs diff=lfs merge=lfs -text
38
+ examples/IMG_9731.mov filter=lfs diff=lfs merge=lfs -text
39
+ examples/IMG_9732.mov filter=lfs diff=lfs merge=lfs -text
40
+ examples/test16.mov filter=lfs diff=lfs merge=lfs -text
41
+ examples/test17.mov filter=lfs diff=lfs merge=lfs -text
42
+ examples/test18.mov filter=lfs diff=lfs merge=lfs -text
43
+ examples/test19.mov filter=lfs diff=lfs merge=lfs -text
44
+ third-party/DPVO/build/lib.win-amd64-3.9/lietorch_backends.cp39-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text
45
+ third-party/DPVO/build/temp.win-amd64-3.9/Release/dpvo/altcorr/correlation.obj filter=lfs diff=lfs merge=lfs -text
46
+ third-party/DPVO/build/temp.win-amd64-3.9/Release/dpvo/altcorr/correlation_kernel.obj filter=lfs diff=lfs merge=lfs -text
47
+ third-party/DPVO/build/temp.win-amd64-3.9/Release/dpvo/fastba/ba.obj filter=lfs diff=lfs merge=lfs -text
48
+ third-party/DPVO/build/temp.win-amd64-3.9/Release/dpvo/fastba/ba_cuda.obj filter=lfs diff=lfs merge=lfs -text
49
+ third-party/DPVO/build/temp.win-amd64-3.9/Release/dpvo/lietorch/src/lietorch.obj filter=lfs diff=lfs merge=lfs -text
50
+ third-party/DPVO/build/temp.win-amd64-3.9/Release/dpvo/lietorch/src/lietorch_cpu.obj filter=lfs diff=lfs merge=lfs -text
51
+ third-party/DPVO/build/temp.win-amd64-3.9/Release/dpvo/lietorch/src/lietorch_gpu.obj filter=lfs diff=lfs merge=lfs -text
52
+ third-party/DPVO/dist/dpvo-0.0.0-py3.9-win-amd64.egg filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Soyong Shin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,11 +1,120 @@
1
- ---
2
- title: Motionbert Meta Sapiens
3
- emoji: 🌍
4
- colorFrom: green
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- short_description: Sapiens
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # WHAM: Reconstructing World-grounded Humans with Accurate 3D Motion
2
+
3
+ <a href="https://pytorch.org/get-started/locally/"><img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-ee4c2c?logo=pytorch&logoColor=white"></a> [![report](https://img.shields.io/badge/arxiv-report-red)](https://arxiv.org/abs/2312.07531) <a href="https://wham.is.tue.mpg.de/"><img alt="Project" src="https://img.shields.io/badge/-Project%20Page-lightgrey?logo=Google%20Chrome&color=informational&logoColor=white"></a> [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1ysUtGSwidTQIdBQRhq0hj63KbseFujkn?usp=sharing)
4
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/wham-reconstructing-world-grounded-humans/3d-human-pose-estimation-on-3dpw)](https://paperswithcode.com/sota/3d-human-pose-estimation-on-3dpw?p=wham-reconstructing-world-grounded-humans) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/wham-reconstructing-world-grounded-humans/3d-human-pose-estimation-on-emdb)](https://paperswithcode.com/sota/3d-human-pose-estimation-on-emdb?p=wham-reconstructing-world-grounded-humans)
5
+
6
+
7
+ https://github.com/yohanshin/WHAM/assets/46889727/da4602b4-0597-4e64-8da4-ab06931b23ee
8
+
9
+
10
+ ## Introduction
11
+ This repository is the official [Pytorch](https://pytorch.org/) implementation of [WHAM: Reconstructing World-grounded Humans with Accurate 3D Motion](https://arxiv.org/abs/2312.07531). For more information, please visit our [project page](https://wham.is.tue.mpg.de/).
12
+
13
+
14
+ ## Installation
15
+ Please see [Installation](docs/INSTALL.md) for details.
16
+
17
+
18
+ ## Quick Demo
19
+
20
+ ### [<img src="https://i.imgur.com/QCojoJk.png" width="30"> Google Colab for WHAM demo is now available](https://colab.research.google.com/drive/1ysUtGSwidTQIdBQRhq0hj63KbseFujkn?usp=sharing)
21
+
22
+ ### Registration
23
+
24
+ To download SMPL body models (Neutral, Female, and Male), you need to register for [SMPL](https://smpl.is.tue.mpg.de/) and [SMPLify](https://smplify.is.tue.mpg.de/). The username and password for both homepages will be used while fetching the demo data.
25
+
26
+ Next, run the following script to fetch demo data. This script will download all the required dependencies including trained models and demo videos.
27
+
28
+ ```bash
29
+ bash fetch_demo_data.sh
30
+ ```
31
+
32
+ You can try with one examplar video:
33
+ ```
34
+ python demo.py --video examples/IMG_9732.mov --visualize
35
+ ```
36
+
37
+ We assume camera focal length following [CLIFF](https://github.com/haofanwang/CLIFF). You can specify known camera intrinsics [fx fy cx cy] for SLAM as the demo example below:
38
+ ```
39
+ python demo.py --video examples/drone_video.mp4 --calib examples/drone_calib.txt --visualize
40
+ ```
41
+
42
+ You can skip SLAM if you only want to get camera-coordinate motion. You can run as:
43
+ ```
44
+ python demo.py --video examples/IMG_9732.mov --visualize --estimate_local_only
45
+ ```
46
+
47
+ You can further refine the results of WHAM using Temporal SMPLify as a post processing. This will allow better 2D alignment as well as 3D accuracy. All you need to do is add `--run_smplify` flag when running demo.
48
+
49
+ ## Docker
50
+
51
+ Please refer to [Docker](docs/DOCKER.md) for details.
52
+
53
+ ## Python API
54
+
55
+ Please refer to [API](docs/API.md) for details.
56
+
57
+ ## Dataset
58
+ Please see [Dataset](docs/DATASET.md) for details.
59
+
60
+ ## Evaluation
61
+ ```bash
62
+ # Evaluate on 3DPW dataset
63
+ python -m lib.eval.evaluate_3dpw --cfg configs/yamls/demo.yaml TRAIN.CHECKPOINT checkpoints/wham_vit_w_3dpw.pth.tar
64
+
65
+ # Evaluate on RICH dataset
66
+ python -m lib.eval.evaluate_rich --cfg configs/yamls/demo.yaml TRAIN.CHECKPOINT checkpoints/wham_vit_w_3dpw.pth.tar
67
+
68
+ # Evaluate on EMDB dataset (also computes W-MPJPE and WA-MPJPE)
69
+ python -m lib.eval.evaluate_emdb --cfg configs/yamls/demo.yaml --eval-split 1 TRAIN.CHECKPOINT checkpoints/wham_vit_w_3dpw.pth.tar # EMDB 1
70
+
71
+ python -m lib.eval.evaluate_emdb --cfg configs/yamls/demo.yaml --eval-split 2 TRAIN.CHECKPOINT checkpoints/wham_vit_w_3dpw.pth.tar # EMDB 2
72
+ ```
73
+
74
+ ## Training
75
+ WHAM training involves into two different stages; (1) 2D to SMPL lifting through AMASS dataset and (2) finetuning with feature integration using the video datasets. Please see [Dataset](docs/DATASET.md) for preprocessing the training datasets.
76
+
77
+ ### Stage 1.
78
+ ```bash
79
+ python train.py --cfg configs/yamls/stage1.yaml
80
+ ```
81
+
82
+ ### Stage 2.
83
+ Training stage 2 requires pretrained results from the stage 1. You can use your pretrained results, or download the weight from [Google Drive](https://drive.google.com/file/d/1Erjkho7O0bnZFawarntICRUCroaKabRE/view?usp=sharing) save as `checkpoints/wham_stage1.tar.pth`.
84
+ ```bash
85
+ python train.py --cfg configs/yamls/stage2.yaml TRAIN.CHECKPOINT <PATH-TO-STAGE1-RESULTS>
86
+ ```
87
+
88
+ ### Train with BEDLAM
89
+ TBD
90
+
91
+ ## Acknowledgement
92
+ We would like to sincerely appreciate Hongwei Yi and Silvia Zuffi for the discussion and proofreading. Part of this work was done when Soyong Shin was an intern at the Max Planck Institute for Intelligence System.
93
+
94
+ The base implementation is largely borrowed from [VIBE](https://github.com/mkocabas/VIBE) and [TCMR](https://github.com/hongsukchoi/TCMR_RELEASE). We use [ViTPose](https://github.com/ViTAE-Transformer/ViTPose) for 2D keypoints detection and [DPVO](https://github.com/princeton-vl/DPVO), [DROID-SLAM](https://github.com/princeton-vl/DROID-SLAM) for extracting camera motion. Please visit their official websites for more details.
95
+
96
+ ## TODO
97
+
98
+ - [ ] Data preprocessing
99
+
100
+ - [x] Training implementation
101
+
102
+ - [x] Colab demo release
103
+
104
+ - [x] Demo for custom videos
105
+
106
+ ## Citation
107
+ ```
108
+ @InProceedings{shin2023wham,
109
+ title={WHAM: Reconstructing World-grounded Humans with Accurate 3D Motion},
110
+ author={Shin, Soyong and Kim, Juyong and Halilaj, Eni and Black, Michael J.},
111
+ booktitle={Computer Vision and Pattern Recognition (CVPR)},
112
+ year={2024}
113
+ }
114
+ ```
115
+
116
+ ## License
117
+ Please see [License](./LICENSE) for details.
118
+
119
+ ## Contact
120
+ Please contact [email protected] for any questions related to this work.
checkpoints/dpvo.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30d02dc2b88a321cf99aad8e4ea1152a44d791b5b65bf95ad036922819c0ff12
3
+ size 14167743
checkpoints/hmr2a.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dcf79638109781d1ae5f5c44fee5f55bc83291c210653feead9b7f04fa6f20e
3
+ size 2709494041
checkpoints/vitpose-h-multi-coco.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50e33f4077ef2a6bcfd7110c58742b24c5859b7798fb0eedd6d2215e0a8980bc
3
+ size 2549075546
checkpoints/wham_vit_bedlam_w_3dpw.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91d250d2d298b00f200aa39df36253b55ca434188c2934d8e91e5e0777fb67fd
3
+ size 527307587
checkpoints/wham_vit_w_3dpw.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9835bcbc952221ad72fa72e768e1f4620e96788b12cecd676a3b1dbee057dd66
3
+ size 527307587
checkpoints/yolov8x.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4d5a3f000d771762f03fc8b57ebd0aae324aeaefdd6e68492a9c4470f2d1e8b
3
+ size 136867539
configs/__pycache__/config.cpython-39.pyc ADDED
Binary file (3.01 kB). View file
 
configs/__pycache__/constants.cpython-39.pyc ADDED
Binary file (2.77 kB). View file
 
configs/config.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from yacs.config import CfgNode as CN
3
+
4
+ # Configuration variable
5
+ cfg = CN()
6
+
7
+ cfg.TITLE = 'default'
8
+ cfg.OUTPUT_DIR = 'results'
9
+ cfg.EXP_NAME = 'default'
10
+ cfg.DEVICE = 'cuda'
11
+ cfg.DEBUG = False
12
+ cfg.EVAL = False
13
+ cfg.RESUME = False
14
+ cfg.LOGDIR = ''
15
+ cfg.NUM_WORKERS = 5
16
+ cfg.SEED_VALUE = -1
17
+ cfg.SUMMARY_ITER = 50
18
+ cfg.MODEL_CONFIG = ''
19
+ cfg.FLIP_EVAL = False
20
+
21
+ cfg.TRAIN = CN()
22
+ cfg.TRAIN.STAGE = 'stage1'
23
+ cfg.TRAIN.DATASET_EVAL = '3dpw'
24
+ cfg.TRAIN.CHECKPOINT = ''
25
+ cfg.TRAIN.BATCH_SIZE = 64
26
+ cfg.TRAIN.START_EPOCH = 0
27
+ cfg.TRAIN.END_EPOCH = 999
28
+ cfg.TRAIN.OPTIM = 'Adam'
29
+ cfg.TRAIN.LR = 3e-4
30
+ cfg.TRAIN.LR_FINETUNE = 5e-5
31
+ cfg.TRAIN.LR_PATIENCE = 5
32
+ cfg.TRAIN.LR_DECAY_RATIO = 0.1
33
+ cfg.TRAIN.WD = 0.0
34
+ cfg.TRAIN.MOMENTUM = 0.9
35
+ cfg.TRAIN.MILESTONES = [50, 70]
36
+
37
+ cfg.DATASET = CN()
38
+ cfg.DATASET.SEQLEN = 81
39
+ cfg.DATASET.RATIO = [1.0, 0, 0, 0, 0]
40
+
41
+ cfg.MODEL = CN()
42
+ cfg.MODEL.BACKBONE = 'vit'
43
+
44
+ cfg.LOSS = CN()
45
+ cfg.LOSS.SHAPE_LOSS_WEIGHT = 0.001
46
+ cfg.LOSS.JOINT2D_LOSS_WEIGHT = 5.
47
+ cfg.LOSS.JOINT3D_LOSS_WEIGHT = 5.
48
+ cfg.LOSS.VERTS3D_LOSS_WEIGHT = 1.
49
+ cfg.LOSS.POSE_LOSS_WEIGHT = 1.
50
+ cfg.LOSS.CASCADED_LOSS_WEIGHT = 0.0
51
+ cfg.LOSS.CONTACT_LOSS_WEIGHT = 0.04
52
+ cfg.LOSS.ROOT_VEL_LOSS_WEIGHT = 0.001
53
+ cfg.LOSS.ROOT_POSE_LOSS_WEIGHT = 0.4
54
+ cfg.LOSS.SLIDING_LOSS_WEIGHT = 0.5
55
+ cfg.LOSS.CAMERA_LOSS_WEIGHT = 0.04
56
+ cfg.LOSS.LOSS_WEIGHT = 60.
57
+ cfg.LOSS.CAMERA_LOSS_SKIP_EPOCH = 5
58
+
59
+
60
+ def get_cfg_defaults():
61
+ """Get a yacs CfgNode object with default values for my_project."""
62
+ # Return a clone so that the defaults will not be altered
63
+ # This is for the "local variable" use pattern
64
+ return cfg.clone()
65
+
66
+
67
+ def get_cfg(args, test):
68
+ """
69
+ Define configuration.
70
+ """
71
+ import os
72
+
73
+ cfg = get_cfg_defaults()
74
+ if os.path.exists(args.cfg):
75
+ cfg.merge_from_file(args.cfg)
76
+
77
+ cfg.merge_from_list(args.opts)
78
+ if test:
79
+ cfg.merge_from_list(['EVAL', True])
80
+
81
+ return cfg.clone()
82
+
83
+
84
+ def bool_arg(value):
85
+ if value.lower() in ('yes', 'true', 't', 'y', '1'):
86
+ return True
87
+ elif value.lower() in ('no', 'false', 'f', 'n', '0'):
88
+ return False
89
+
90
+
91
+ def parse_args(test=False):
92
+ parser = argparse.ArgumentParser()
93
+ parser.add_argument('-c', '--cfg', type=str, default='./configs/debug.yaml', help='cfg file path')
94
+ parser.add_argument(
95
+ "--eval-set", type=str, default='3dpw', help="Evaluation dataset")
96
+ parser.add_argument(
97
+ "--eval-split", type=str, default='test', help="Evaluation data split")
98
+ parser.add_argument('--render', default=False, type=bool_arg,
99
+ help='Render SMPL meshes after the evaluation')
100
+ parser.add_argument('--save-results', default=False, type=bool_arg,
101
+ help='Save SMPL parameters after the evaluation')
102
+ parser.add_argument(
103
+ "opts", default=None, nargs=argparse.REMAINDER,
104
+ help="Modify config options using the command-line")
105
+
106
+ args = parser.parse_args()
107
+ print(args, end='\n\n')
108
+ cfg_file = args.cfg
109
+ cfg = get_cfg(args, test)
110
+
111
+ return cfg, cfg_file, args
configs/constants.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+ from __future__ import print_function
3
+ from __future__ import division
4
+
5
+ import torch
6
+
7
+ IMG_FEAT_DIM = {
8
+ 'resnet': 2048,
9
+ 'vit': 1024
10
+ }
11
+
12
+ N_JOINTS = 17
13
+ root = 'dataset'
14
+ class PATHS:
15
+ # Raw data folders
16
+ PARSED_DATA = f'{root}/parsed_data'
17
+ AMASS_PTH = f'{root}/AMASS'
18
+ THREEDPW_PTH = f'{root}/3DPW'
19
+ HUMAN36M_PTH = f'{root}/Human36M'
20
+ RICH_PTH = f'{root}/RICH'
21
+ EMDB_PTH = f'{root}/EMDB'
22
+
23
+ # Processed labels
24
+ AMASS_LABEL = f'{root}/parsed_data/amass.pth'
25
+ THREEDPW_LABEL = f'{root}/parsed_data/3dpw_dset_backbone.pth'
26
+ MPII3D_LABEL = f'{root}/parsed_data/mpii3d_dset_backbone.pth'
27
+ HUMAN36M_LABEL = f'{root}/parsed_data/human36m_dset_backbone.pth'
28
+ INSTA_LABEL = f'{root}/parsed_data/insta_dset_backbone.pth'
29
+ BEDLAM_LABEL = f'{root}/parsed_data/bedlam_train_backbone.pth'
30
+
31
+ class KEYPOINTS:
32
+ NUM_JOINTS = N_JOINTS
33
+ H36M_TO_J17 = [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10, 0, 7, 9]
34
+ H36M_TO_J14 = H36M_TO_J17[:14]
35
+ J17_TO_H36M = [14, 3, 4, 5, 2, 1, 0, 15, 12, 16, 13, 9, 10, 11, 8, 7, 6]
36
+ COCO_AUG_DICT = f'{root}/body_models/coco_aug_dict.pth'
37
+ TREE = [[5, 6], 0, 0, 1, 2, -1, -1, 5, 6, 7, 8, -1, -1, 11, 12, 13, 14, 15, 15, 15, 16, 16, 16]
38
+
39
+ # STD scale for video noise
40
+ S_BIAS = 1e-1
41
+ S_JITTERING = 5e-2
42
+ S_PEAK = 3e-1
43
+ S_PEAK_MASK = 5e-3
44
+ S_MASK = 0.03
45
+
46
+
47
+ class BMODEL:
48
+ MAIN_JOINTS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] # reduced_joints
49
+
50
+ FLDR = f'{root}/body_models/smpl/'
51
+ SMPLX2SMPL = f'{root}/body_models/smplx2smpl.pkl'
52
+ FACES = f'{root}/body_models/smpl_faces.npy'
53
+ MEAN_PARAMS = f'{root}/body_models/smpl_mean_params.npz'
54
+ JOINTS_REGRESSOR_WHAM = f'{root}/body_models/J_regressor_wham.npy'
55
+ JOINTS_REGRESSOR_H36M = f'{root}/body_models/J_regressor_h36m.npy'
56
+ JOINTS_REGRESSOR_EXTRA = f'{root}/body_models/J_regressor_extra.npy'
57
+ JOINTS_REGRESSOR_FEET = f'{root}/body_models/J_regressor_feet.npy'
58
+ PARENTS = torch.tensor([
59
+ -1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 16, 17, 18, 19, 20, 21])
configs/yamls/demo.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOGDIR: ''
2
+ DEVICE: 'cuda'
3
+ EXP_NAME: 'demo'
4
+ OUTPUT_DIR: 'experiments/'
5
+ NUM_WORKERS: 0
6
+ MODEL_CONFIG: 'configs/yamls/model_base.yaml'
7
+ FLIP_EVAL: True
8
+
9
+ TRAIN:
10
+ STAGE: 'stage2'
11
+ CHECKPOINT: 'checkpoints/wham_vit_bedlam_w_3dpw.pth.tar'
12
+
13
+ MODEL:
14
+ BACKBONE: 'vit'
configs/yamls/model_base.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ architecture: 'RNN'
2
+ in_dim: 49
3
+ n_iters: 1
4
+ pose_dr: 0.15
5
+ d_embed: 512
6
+ n_layers: 3
7
+ layer: 'LSTM'
configs/yamls/stage1.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOGDIR: ''
2
+ DEVICE: 'cuda'
3
+ EXP_NAME: 'train_stage1'
4
+ OUTPUT_DIR: 'experiments/'
5
+ NUM_WORKERS: 8
6
+ MODEL_CONFIG: 'configs/yamls/model_base.yaml'
7
+ FLIP_EVAL: True
8
+ SEED_VALUE: 42
9
+
10
+ TRAIN:
11
+ LR: 5e-4
12
+ BATCH_SIZE: 64
13
+ END_EPOCH: 100
14
+ STAGE: 'stage1'
15
+ CHECKPOINT: ''
16
+ MILESTONES: [60, 80]
17
+
18
+ LOSS:
19
+ SHAPE_LOSS_WEIGHT: 0.004
20
+ JOINT3D_LOSS_WEIGHT: 0.4
21
+ JOINT2D_LOSS_WEIGHT: 0.1
22
+ POSE_LOSS_WEIGHT: 8.0
23
+ CASCADED_LOSS_WEIGHT: 0.0
24
+ SLIDING_LOSS_WEIGHT: 0.5
25
+ CAMERA_LOSS_WEIGHT: 0.04
26
+ ROOT_VEL_LOSS_WEIGHT: 0.001
27
+ LOSS_WEIGHT: 50.0
28
+ CAMERA_LOSS_SKIP_EPOCH: 5
configs/yamls/stage2.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOGDIR: ''
2
+ DEVICE: 'cuda'
3
+ EXP_NAME: 'train_stage2'
4
+ OUTPUT_DIR: 'experiments'
5
+ NUM_WORKERS: 8
6
+ MODEL_CONFIG: 'configs/yamls/model_base.yaml'
7
+ FLIP_EVAL: True
8
+ SEED_VALUE: 42
9
+
10
+ TRAIN:
11
+ LR: 1e-4
12
+ LR_FINETUNE: 1e-5
13
+ STAGE: 'stage2'
14
+ CHECKPOINT: 'checkpoints/wham_stage1.pth.tar'
15
+ BATCH_SIZE: 64
16
+ END_EPOCH: 40
17
+ MILESTONES: [20, 30]
18
+ LR_DECAY_RATIO: 0.2
19
+
20
+ MODEL:
21
+ BACKBONE: 'vit'
22
+
23
+ LOSS:
24
+ SHAPE_LOSS_WEIGHT: 0.0
25
+ JOINT2D_LOSS_WEIGHT: 3.0
26
+ JOINT3D_LOSS_WEIGHT: 6.0
27
+ POSE_LOSS_WEIGHT: 1.0
28
+ CASCADED_LOSS_WEIGHT: 0.05
29
+ SLIDING_LOSS_WEIGHT: 0.5
30
+ CAMERA_LOSS_WEIGHT: 0.01
31
+ ROOT_VEL_LOSS_WEIGHT: 0.001
32
+ LOSS_WEIGHT: 60.0
33
+ CAMERA_LOSS_SKIP_EPOCH: 0
34
+
35
+ DATASET:
36
+ SEQLEN: 81
37
+ RATIO: [0.2, 0.2, 0.2, 0.2, 0.2]
configs/yamls/stage2_b.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOGDIR: ''
2
+ DEVICE: 'cuda'
3
+ EXP_NAME: 'train_stage2_b'
4
+ OUTPUT_DIR: 'experiments'
5
+ NUM_WORKERS: 8
6
+ MODEL_CONFIG: 'configs/yamls/model_base.yaml'
7
+ FLIP_EVAL: True
8
+ SEED_VALUE: 42
9
+
10
+ TRAIN:
11
+ LR: 1e-4
12
+ LR_FINETUNE: 1e-5
13
+ STAGE: 'stage2'
14
+ CHECKPOINT: 'checkpoints/wham_stage1.pth.tar'
15
+ BATCH_SIZE: 64
16
+ END_EPOCH: 80
17
+ MILESTONES: [40, 50, 70]
18
+ LR_DECAY_RATIO: 0.2
19
+
20
+ MODEL:
21
+ BACKBONE: 'vit'
22
+
23
+ LOSS:
24
+ SHAPE_LOSS_WEIGHT: 0.0
25
+ JOINT2D_LOSS_WEIGHT: 5.0
26
+ JOINT3D_LOSS_WEIGHT: 5.0
27
+ VERTS3D_LOSS_WEIGHT: 1.0
28
+ POSE_LOSS_WEIGHT: 3.0
29
+ CASCADED_LOSS_WEIGHT: 0.05
30
+ SLIDING_LOSS_WEIGHT: 0.5
31
+ CAMERA_LOSS_WEIGHT: 0.01
32
+ ROOT_VEL_LOSS_WEIGHT: 0.001
33
+ LOSS_WEIGHT: 60.0
34
+ CAMERA_LOSS_SKIP_EPOCH: 0
35
+
36
+ DATASET:
37
+ SEQLEN: 81
38
+ RATIO: [0.2, 0.2, 0.2, 0.2, 0.0, 0.2]
dataset/body_models/J_regressor_coco.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd49241810715e752aa7384363b7bc09fb96b386ca99aa1c3eb2c0d15d6b8b9
3
+ size 468648
dataset/body_models/J_regressor_feet.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef9e6d64796f2f342983a9fde6a6d9f8e3544f1239e7f86aa4f6b7aa82f4cf6
3
+ size 220608
dataset/body_models/J_regressor_h36m.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c655cd7013d7829eb9acbebf0e43f952a3fa0305a53c35880e39192bfb6444a0
3
+ size 937168
dataset/body_models/J_regressor_wham.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f938dcfd5cd88d0b19ee34e442d49f1dc370d3d8c4f5aef57a93d0cf2e267c4c
3
+ size 854488
dataset/body_models/smpl/SMPL_FEMALE.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a583c1b98e4afc19042641f1bae5cd8a1f712a6724886291a7627ec07acd408d
3
+ size 39056454
dataset/body_models/smpl/SMPL_MALE.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e8c0bbbbc635dcb166ed29c303fb4bef16ea5f623e5a89263495a9e403575bd
3
+ size 39056404
dataset/body_models/smpl/SMPL_NEUTRAL.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e65c74ad9b998783132f00880d1025a8d64b158e040e6ef13a557e5098bc42
3
+ size 39001280
dataset/body_models/smpl/__MACOSX/._smpl ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/._.DS_Store ADDED
Binary file (120 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/.___init__.py ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/._models ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/._smpl_webuser ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/models/basicModel_f_lbs_10_207_0_v1.0.0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a583c1b98e4afc19042641f1bae5cd8a1f712a6724886291a7627ec07acd408d
3
+ size 39056454
dataset/body_models/smpl/__MACOSX/smpl/models/basicmodel_m_lbs_10_207_0_v1.0.0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e8c0bbbbc635dcb166ed29c303fb4bef16ea5f623e5a89263495a9e403575bd
3
+ size 39056404
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._LICENSE.txt ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._README.txt ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/.___init__.py ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._hello_world ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._lbs.py ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._posemapper.py ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._serialization.py ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._verts.py ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._hello_smpl.py ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._render_smpl.py ADDED
Binary file (239 Bytes). View file
 
dataset/body_models/smpl_faces.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51fc11ebadb0487d74bef220c4eea43f014609249f0121413c1fc629d859fecb
3
+ size 165392
dataset/body_models/smpl_mean_params.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd6dd687800da946d0a0492383f973b92ec20f166a0b829775882868c35fcdd
3
+ size 1310
dataset/body_models/smplx2smpl.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1d912d121ad98132e4492d8e7a0f1a8cf4412811e14a7ef8cb337bb48eef99e
3
+ size 578019251
demo.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ import os.path as osp
4
+ from glob import glob
5
+ from collections import defaultdict
6
+
7
+ import cv2
8
+ import torch
9
+ import joblib
10
+ import numpy as np
11
+ from loguru import logger
12
+ from progress.bar import Bar
13
+
14
+ from configs.config import get_cfg_defaults
15
+ from lib.data.datasets import CustomDataset
16
+ from lib.utils.imutils import avg_preds
17
+ from lib.utils.transforms import matrix_to_axis_angle
18
+ from lib.models import build_network, build_body_model
19
+ from lib.models.preproc.detector import DetectionModel
20
+ from lib.models.preproc.extractor import FeatureExtractor
21
+ from lib.models.smplify import TemporalSMPLify
22
+
23
+ try:
24
+ from lib.models.preproc.slam import SLAMModel
25
+ _run_global = True
26
+ except:
27
+ logger.info('DPVO is not properly installed. Only estimate in local coordinates !')
28
+ _run_global = False
29
+
30
+ def run(cfg,
31
+ video,
32
+ output_pth,
33
+ network,
34
+ calib=None,
35
+ run_global=True,
36
+ save_pkl=False,
37
+ visualize=False):
38
+
39
+ cap = cv2.VideoCapture(video)
40
+ assert cap.isOpened(), f'Faild to load video file {video}'
41
+ fps = cap.get(cv2.CAP_PROP_FPS)
42
+ length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
43
+ width, height = cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
44
+
45
+ # Whether or not estimating motion in global coordinates
46
+ run_global = run_global and _run_global
47
+
48
+ # Preprocess
49
+ with torch.no_grad():
50
+ if not (osp.exists(osp.join(output_pth, 'tracking_results.pth')) and
51
+ osp.exists(osp.join(output_pth, 'slam_results.pth'))):
52
+
53
+ detector = DetectionModel(cfg.DEVICE.lower())
54
+ extractor = FeatureExtractor(cfg.DEVICE.lower(), cfg.FLIP_EVAL)
55
+
56
+ if run_global: slam = SLAMModel(video, output_pth, width, height, calib)
57
+ else: slam = None
58
+
59
+ bar = Bar('Preprocess: 2D detection and SLAM', fill='#', max=length)
60
+ while (cap.isOpened()):
61
+ flag, img = cap.read()
62
+ if not flag: break
63
+
64
+ # 2D detection and tracking
65
+ detector.track(img, fps, length)
66
+
67
+ # SLAM
68
+ if slam is not None:
69
+ slam.track()
70
+
71
+ bar.next()
72
+
73
+ tracking_results = detector.process(fps)
74
+
75
+ if slam is not None:
76
+ slam_results = slam.process()
77
+ else:
78
+ slam_results = np.zeros((length, 7))
79
+ slam_results[:, 3] = 1.0 # Unit quaternion
80
+
81
+ # Extract image features
82
+ # TODO: Merge this into the previous while loop with an online bbox smoothing.
83
+ tracking_results = extractor.run(video, tracking_results)
84
+ logger.info('Complete Data preprocessing!')
85
+
86
+ # Save the processed data
87
+ joblib.dump(tracking_results, osp.join(output_pth, 'tracking_results.pth'))
88
+ joblib.dump(slam_results, osp.join(output_pth, 'slam_results.pth'))
89
+ logger.info(f'Save processed data at {output_pth}')
90
+
91
+ # If the processed data already exists, load the processed data
92
+ else:
93
+ tracking_results = joblib.load(osp.join(output_pth, 'tracking_results.pth'))
94
+ slam_results = joblib.load(osp.join(output_pth, 'slam_results.pth'))
95
+ logger.info(f'Already processed data exists at {output_pth} ! Load the data .')
96
+
97
+ # Build dataset
98
+ dataset = CustomDataset(cfg, tracking_results, slam_results, width, height, fps)
99
+
100
+ # run WHAM
101
+ results = defaultdict(dict)
102
+
103
+ n_subjs = len(dataset)
104
+ for subj in range(n_subjs):
105
+
106
+ with torch.no_grad():
107
+ if cfg.FLIP_EVAL:
108
+ # Forward pass with flipped input
109
+ flipped_batch = dataset.load_data(subj, True)
110
+ _id, x, inits, features, mask, init_root, cam_angvel, frame_id, kwargs = flipped_batch
111
+ flipped_pred = network(x, inits, features, mask=mask, init_root=init_root, cam_angvel=cam_angvel, return_y_up=True, **kwargs)
112
+
113
+ # Forward pass with normal input
114
+ batch = dataset.load_data(subj)
115
+ _id, x, inits, features, mask, init_root, cam_angvel, frame_id, kwargs = batch
116
+ pred = network(x, inits, features, mask=mask, init_root=init_root, cam_angvel=cam_angvel, return_y_up=True, **kwargs)
117
+
118
+ # Merge two predictions
119
+ flipped_pose, flipped_shape = flipped_pred['pose'].squeeze(0), flipped_pred['betas'].squeeze(0)
120
+ pose, shape = pred['pose'].squeeze(0), pred['betas'].squeeze(0)
121
+ flipped_pose, pose = flipped_pose.reshape(-1, 24, 6), pose.reshape(-1, 24, 6)
122
+ avg_pose, avg_shape = avg_preds(pose, shape, flipped_pose, flipped_shape)
123
+ avg_pose = avg_pose.reshape(-1, 144)
124
+ avg_contact = (flipped_pred['contact'][..., [2, 3, 0, 1]] + pred['contact']) / 2
125
+
126
+ # Refine trajectory with merged prediction
127
+ network.pred_pose = avg_pose.view_as(network.pred_pose)
128
+ network.pred_shape = avg_shape.view_as(network.pred_shape)
129
+ network.pred_contact = avg_contact.view_as(network.pred_contact)
130
+ output = network.forward_smpl(**kwargs)
131
+ pred = network.refine_trajectory(output, cam_angvel, return_y_up=True)
132
+
133
+ else:
134
+ # data
135
+ batch = dataset.load_data(subj)
136
+ _id, x, inits, features, mask, init_root, cam_angvel, frame_id, kwargs = batch
137
+
138
+ # inference
139
+ pred = network(x, inits, features, mask=mask, init_root=init_root, cam_angvel=cam_angvel, return_y_up=True, **kwargs)
140
+
141
+ # if False:
142
+ if args.run_smplify:
143
+ smplify = TemporalSMPLify(smpl, img_w=width, img_h=height, device=cfg.DEVICE)
144
+ input_keypoints = dataset.tracking_results[_id]['keypoints']
145
+ pred = smplify.fit(pred, input_keypoints, **kwargs)
146
+
147
+ with torch.no_grad():
148
+ network.pred_pose = pred['pose']
149
+ network.pred_shape = pred['betas']
150
+ network.pred_cam = pred['cam']
151
+ output = network.forward_smpl(**kwargs)
152
+ pred = network.refine_trajectory(output, cam_angvel, return_y_up=True)
153
+
154
+ # ========= Store results ========= #
155
+ pred_body_pose = matrix_to_axis_angle(pred['poses_body']).cpu().numpy().reshape(-1, 69)
156
+ pred_root = matrix_to_axis_angle(pred['poses_root_cam']).cpu().numpy().reshape(-1, 3)
157
+ pred_root_world = matrix_to_axis_angle(pred['poses_root_world']).cpu().numpy().reshape(-1, 3)
158
+ pred_pose = np.concatenate((pred_root, pred_body_pose), axis=-1)
159
+ pred_pose_world = np.concatenate((pred_root_world, pred_body_pose), axis=-1)
160
+ pred_trans = (pred['trans_cam'] - network.output.offset).cpu().numpy()
161
+
162
+ results[_id]['pose'] = pred_pose
163
+ results[_id]['trans'] = pred_trans
164
+ results[_id]['pose_world'] = pred_pose_world
165
+ results[_id]['trans_world'] = pred['trans_world'].cpu().squeeze(0).numpy()
166
+ results[_id]['betas'] = pred['betas'].cpu().squeeze(0).numpy()
167
+ results[_id]['verts'] = (pred['verts_cam'] + pred['trans_cam'].unsqueeze(1)).cpu().numpy()
168
+ results[_id]['frame_ids'] = frame_id
169
+
170
+ if save_pkl:
171
+ joblib.dump(results, osp.join(output_pth, "wham_output.pkl"))
172
+
173
+ # Visualize
174
+ if visualize:
175
+ from lib.vis.run_vis import run_vis_on_demo
176
+ with torch.no_grad():
177
+ run_vis_on_demo(cfg, video, results, output_pth, network.smpl, vis_global=run_global)
178
+
179
+
180
+ if __name__ == '__main__':
181
+ parser = argparse.ArgumentParser()
182
+
183
+ parser.add_argument('--video', type=str,
184
+ default='examples/demo_video.mp4',
185
+ help='input video path or youtube link')
186
+
187
+ parser.add_argument('--output_pth', type=str, default='output/demo',
188
+ help='output folder to write results')
189
+
190
+ parser.add_argument('--calib', type=str, default=None,
191
+ help='Camera calibration file path')
192
+
193
+ parser.add_argument('--estimate_local_only', action='store_true',
194
+ help='Only estimate motion in camera coordinate if True')
195
+
196
+ parser.add_argument('--visualize', action='store_true',
197
+ help='Visualize the output mesh if True')
198
+
199
+ parser.add_argument('--save_pkl', action='store_true',
200
+ help='Save output as pkl file')
201
+
202
+ parser.add_argument('--run_smplify', action='store_true',
203
+ help='Run Temporal SMPLify for post processing')
204
+
205
+ args = parser.parse_args()
206
+
207
+ cfg = get_cfg_defaults()
208
+ cfg.merge_from_file('configs/yamls/demo.yaml')
209
+
210
+ logger.info(f'GPU name -> {torch.cuda.get_device_name()}')
211
+ logger.info(f'GPU feat -> {torch.cuda.get_device_properties("cuda")}')
212
+
213
+ # ========= Load WHAM ========= #
214
+ smpl_batch_size = cfg.TRAIN.BATCH_SIZE * cfg.DATASET.SEQLEN
215
+ smpl = build_body_model(cfg.DEVICE, smpl_batch_size)
216
+ network = build_network(cfg, smpl)
217
+ network.eval()
218
+
219
+ # Output folder
220
+ sequence = '.'.join(args.video.split('/')[-1].split('.')[:-1])
221
+ output_pth = osp.join(args.output_pth, sequence)
222
+ os.makedirs(output_pth, exist_ok=True)
223
+
224
+ run(cfg,
225
+ args.video,
226
+ output_pth,
227
+ network,
228
+ args.calib,
229
+ run_global=not args.estimate_local_only,
230
+ save_pkl=args.save_pkl,
231
+ visualize=args.visualize)
232
+
233
+ print()
234
+ logger.info('Done !')
docs/API.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Python API
2
+
3
+ To use python API of WHAM, please finish the basic installation first ([Installation](INSTALL.md) or [Docker](DOCKER.md)).
4
+
5
+ If you use Docker environment, please run:
6
+
7
+ ```bash
8
+ cd /path/to/WHAM
9
+ docker run -it -v .:/code/ --rm yusun9/wham-vitpose-dpvo-cuda11.3-python3.9 python
10
+ ```
11
+
12
+ Then you can run wham via python code like
13
+ ```bash
14
+ from wham_api import WHAM_API
15
+ wham_model = WHAM_API()
16
+ input_video_path = 'examples/IMG_9732.mov'
17
+ results, tracking_results, slam_results = wham_model(input_video_path)
18
+ ```
docs/DATASET.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dataset
2
+
3
+ ## Training Data
4
+ We use [AMASS](https://amass.is.tue.mpg.de/), [InstaVariety](https://github.com/akanazawa/human_dynamics/blob/master/doc/insta_variety.md), [MPI-INF-3DHP](https://vcai.mpi-inf.mpg.de/3dhp-dataset/), [Human3.6M](http://vision.imar.ro/human3.6m/description.php), and [3DPW](https://virtualhumans.mpi-inf.mpg.de/3DPW/) datasets for training. Please register to their websites to download and process the data. You can download parsed ViT version of InstaVariety, MPI-INF-3DHP, Human3.6M, and 3DPW data from the [Google Drive](https://drive.google.com/drive/folders/13T2ghVvrw_fEk3X-8L0e6DVSYx_Og8o3?usp=sharing). You can save the data under `dataset/parsed_data` folder.
5
+
6
+ ### Process AMASS dataset
7
+ After downloading AMASS dataset, you can process it by running:
8
+ ```bash
9
+ python -m lib.data_utils.amass_utils
10
+ ```
11
+ The processed data will be stored at `dataset/parsed_data/amass.pth`.
12
+
13
+ ### Process 3DPW, MPII3D, Human3.6M, and InstaVariety datasets
14
+ First, visit [TCMR](https://github.com/hongsukchoi/TCMR_RELEASE) and download preprocessed data at `dataset/parsed_data/TCMR_preproc/'.
15
+
16
+ Next, prepare 2D keypoints detection using [ViTPose](https://github.com/ViTAE-Transformer/ViTPose) and store the results at `dataset/detection_results/\<DATAsET-NAME>/\<SEQUENCE_NAME.npy>'. You may need to download all images to prepare the detection results.
17
+
18
+ For Human36M, MPII3D, and InstaVariety datasets, you need to also download [NeuralAnnot](https://github.com/mks0601/NeuralAnnot_RELEASE) pseudo groundtruth SMPL label. As mentioned in our paper, we do not supervise WHAM on this label, but use it for neural initialization step.
19
+
20
+ Finally, run following codes to preprocess all training data.
21
+ ```bash
22
+ python -m lib.data_utils.threedpw_train_utils # 3DPW dataset
23
+ # [Coming] python -m lib.data_utils.human36m_train_utils # Human3.6M dataset
24
+ # [Coming] python -m lib.data_utils.mpii3d_train_utils # MPI-INF-3DHP dataset
25
+ # [Coming] python -m lib.data_utils.insta_train_utils # InstaVariety dataset
26
+ ```
27
+
28
+ ### Process BEDLAM dataset
29
+ Will be updated.
30
+
31
+ ## Evaluation Data
32
+ We use [3DPW](https://virtualhumans.mpi-inf.mpg.de/3DPW/), [RICH](https://rich.is.tue.mpg.de/), and [EMDB](https://eth-ait.github.io/emdb/) for the evaluation. We provide the parsed data for the evaluation. Please download the data from [Google Drive](https://drive.google.com/drive/folders/13T2ghVvrw_fEk3X-8L0e6DVSYx_Og8o3?usp=sharing) and place them at `dataset/parsed_data/`.
33
+
34
+ To process the data at your end, please
35
+ 1) Download parsed 3DPW data from [TCMR](https://github.com/hongsukchoi/TCMR_RELEASE) and store `dataset/parsed_data/TCMR_preproc/'.
36
+ 2) Run [ViTPose](https://github.com/ViTAE-Transformer/ViTPose) on all test data and store the results at `dataset/detection_results/\<DATAsET-NAME>'.
37
+ 3) Run following codes.
38
+ ```bash
39
+ python -m lib.data_utils.threedpw_eval_utils --split <"val" or "test"> # 3DPW dataset
40
+ python -m lib.data_utils.emdb_eval_utils --split <"1" or "2"> # EMDB dataset
41
+ python -m lib.data_utils.rich_eval_utils # RICH dataset
42
+ ```
docs/DOCKER.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Installation
2
+
3
+ ### Pre-requirments
4
+ 1. Please make sure that you have properly installed the [Docker](https://www.docker.com/) and [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) before installation.
5
+
6
+ 2. Please prepare the essential data for inference:
7
+ To download SMPL body models (Neutral, Female, and Male), you need to register for [SMPL](https://smpl.is.tue.mpg.de/) and [SMPLify](https://smplify.is.tue.mpg.de/). The username and password for both homepages will be used while fetching the demo data.
8
+ Next, run the following script to fetch demo data. This script will download all the required dependencies including trained models and demo videos.
9
+ ```bash
10
+ bash fetch_demo_data.sh
11
+ ```
12
+
13
+ ### Usage
14
+ 1. Pulling the docker image from docker hub:
15
+ ```bash
16
+ docker pull yusun9/wham-vitpose-dpvo-cuda11.3-python3.9:latest
17
+ ```
18
+
19
+ 2. Run the code with docker environment:
20
+ ```bash
21
+ cd /path/to/WHAM
22
+ docker run -v .:/code/ --rm yusun9/wham-vitpose-dpvo-cuda11.3-python3.9 python demo.py --video examples/IMG_9732.mov
23
+ ```
docs/INSTALL.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Installation
2
+
3
+ WHAM has been implemented and tested on Ubuntu 20.04 and 22.04 with python = 3.9. We provide [anaconda](https://www.anaconda.com/) environment to run WHAM as below.
4
+
5
+ ```bash
6
+ # Clone the repo
7
+ git clone https://github.com/yohanshin/WHAM.git --recursive
8
+ cd WHAM/
9
+
10
+ # Create Conda environment
11
+ conda create -n wham python=3.9
12
+ conda activate wham
13
+
14
+ # Install PyTorch libraries
15
+ conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch
16
+
17
+ # Install PyTorch3D (optional) for visualization
18
+ conda install -c fvcore -c iopath -c conda-forge fvcore iopath
19
+ pip install pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py39_cu113_pyt1110/download.html
20
+
21
+ # Install WHAM dependencies
22
+ pip install -r requirements.txt
23
+
24
+ # Install ViTPose
25
+ pip install -v -e third-party/ViTPose
26
+
27
+ # Install DPVO
28
+ cd third-party/DPVO
29
+ wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip
30
+ unzip eigen-3.4.0.zip -d thirdparty && rm -rf eigen-3.4.0.zip
31
+ conda install pytorch-scatter=2.0.9 -c rusty1s
32
+ conda install cudatoolkit-dev=11.3.1 -c conda-forge
33
+
34
+ # ONLY IF your GCC version is larger than 10
35
+ conda install -c conda-forge gxx=9.5
36
+
37
+ pip install .
38
+ ```