Spaces:

Form-Fighter
/

FormFighterAIStack

Sleeping

App Files Files Community

Techt3o commited on Oct 23, 2024

Commit

57ae837

verified ·

1 Parent(s): 053f2f0

7b38318811baa04ff0314e2a0700e7273cc8cccf91c9fe7f87200c3bcfd1c703

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +4 -0
checkpoints/dpvo.pth +3 -0
checkpoints/wham_vit_bedlam_w_3dpw.pth.tar +3 -0
checkpoints/wham_vit_w_3dpw.pth.tar +3 -0
checkpoints/yolov8x.pt +3 -0
configs/__pycache__/config.cpython-39.pyc +0 -0
configs/__pycache__/constants.cpython-39.pyc +0 -0
configs/config.py +111 -0
configs/constants.py +59 -0
configs/yamls/demo.yaml +14 -0
configs/yamls/model_base.yaml +7 -0
configs/yamls/stage1.yaml +28 -0
configs/yamls/stage2.yaml +37 -0
configs/yamls/stage2_b.yaml +38 -0
dataset/body_models/J_regressor_coco.npy +3 -0
dataset/body_models/J_regressor_feet.npy +3 -0
dataset/body_models/J_regressor_h36m.npy +3 -0
dataset/body_models/J_regressor_wham.npy +3 -0
dataset/body_models/smpl/SMPL_FEMALE.pkl +3 -0
dataset/body_models/smpl/SMPL_MALE.pkl +3 -0
dataset/body_models/smpl/SMPL_NEUTRAL.pkl +3 -0
dataset/body_models/smpl/__MACOSX/._smpl +0 -0
dataset/body_models/smpl/__MACOSX/smpl/._.DS_Store +0 -0
dataset/body_models/smpl/__MACOSX/smpl/.___init__.py +0 -0
dataset/body_models/smpl/__MACOSX/smpl/._models +0 -0
dataset/body_models/smpl/__MACOSX/smpl/._smpl_webuser +0 -0
dataset/body_models/smpl/__MACOSX/smpl/models/basicModel_f_lbs_10_207_0_v1.0.0.pkl +3 -0
dataset/body_models/smpl/__MACOSX/smpl/models/basicmodel_m_lbs_10_207_0_v1.0.0.pkl +3 -0
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._LICENSE.txt +0 -0
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._README.txt +0 -0
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/.___init__.py +0 -0
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._hello_world +0 -0
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._lbs.py +0 -0
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._posemapper.py +0 -0
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._serialization.py +0 -0
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._verts.py +0 -0
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._hello_smpl.py +0 -0
dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._render_smpl.py +0 -0
dataset/body_models/smpl_faces.npy +3 -0
dataset/body_models/smpl_mean_params.npz +3 -0
dataset/body_models/smplx2smpl.pkl +3 -0
demo.py +234 -0
docs/API.md +18 -0
docs/DATASET.md +42 -0
docs/DOCKER.md +23 -0
docs/INSTALL.md +38 -0
examples/IMG_9730.mov +3 -0
examples/IMG_9731.mov +3 -0
examples/IMG_9732.mov +3 -0
examples/drone_calib.txt +1 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/drone_video.mp4 filter=lfs diff=lfs merge=lfs -text
+examples/IMG_9730.mov filter=lfs diff=lfs merge=lfs -text
+examples/IMG_9731.mov filter=lfs diff=lfs merge=lfs -text
+examples/IMG_9732.mov filter=lfs diff=lfs merge=lfs -text

checkpoints/dpvo.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30d02dc2b88a321cf99aad8e4ea1152a44d791b5b65bf95ad036922819c0ff12
+size 14167743

checkpoints/wham_vit_bedlam_w_3dpw.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91d250d2d298b00f200aa39df36253b55ca434188c2934d8e91e5e0777fb67fd
+size 527307587

checkpoints/wham_vit_w_3dpw.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9835bcbc952221ad72fa72e768e1f4620e96788b12cecd676a3b1dbee057dd66
+size 527307587

checkpoints/yolov8x.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4d5a3f000d771762f03fc8b57ebd0aae324aeaefdd6e68492a9c4470f2d1e8b
+size 136867539

configs/__pycache__/config.cpython-39.pyc ADDED Viewed

Binary file (3.01 kB). View file

configs/__pycache__/constants.cpython-39.pyc ADDED Viewed

Binary file (2.77 kB). View file

configs/config.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import argparse
+from yacs.config import CfgNode as CN
+# Configuration variable
+cfg = CN()
+cfg.TITLE = 'default'
+cfg.OUTPUT_DIR = 'results'
+cfg.EXP_NAME = 'default'
+cfg.DEVICE = 'cuda'
+cfg.DEBUG = False
+cfg.EVAL = False
+cfg.RESUME = False
+cfg.LOGDIR = ''
+cfg.NUM_WORKERS = 5
+cfg.SEED_VALUE = -1
+cfg.SUMMARY_ITER = 50
+cfg.MODEL_CONFIG = ''
+cfg.FLIP_EVAL = False
+cfg.TRAIN = CN()
+cfg.TRAIN.STAGE = 'stage1'
+cfg.TRAIN.DATASET_EVAL = '3dpw'
+cfg.TRAIN.CHECKPOINT = ''
+cfg.TRAIN.BATCH_SIZE = 64
+cfg.TRAIN.START_EPOCH = 0
+cfg.TRAIN.END_EPOCH = 999
+cfg.TRAIN.OPTIM = 'Adam'
+cfg.TRAIN.LR = 3e-4
+cfg.TRAIN.LR_FINETUNE = 5e-5
+cfg.TRAIN.LR_PATIENCE = 5
+cfg.TRAIN.LR_DECAY_RATIO = 0.1
+cfg.TRAIN.WD = 0.0
+cfg.TRAIN.MOMENTUM = 0.9
+cfg.TRAIN.MILESTONES = [50, 70]
+cfg.DATASET = CN()
+cfg.DATASET.SEQLEN = 81
+cfg.DATASET.RATIO = [1.0, 0, 0, 0, 0]
+cfg.MODEL = CN()
+cfg.MODEL.BACKBONE = 'vit'
+cfg.LOSS = CN()
+cfg.LOSS.SHAPE_LOSS_WEIGHT = 0.001
+cfg.LOSS.JOINT2D_LOSS_WEIGHT = 5.
+cfg.LOSS.JOINT3D_LOSS_WEIGHT = 5.
+cfg.LOSS.VERTS3D_LOSS_WEIGHT = 1.
+cfg.LOSS.POSE_LOSS_WEIGHT = 1.
+cfg.LOSS.CASCADED_LOSS_WEIGHT = 0.0
+cfg.LOSS.CONTACT_LOSS_WEIGHT = 0.04
+cfg.LOSS.ROOT_VEL_LOSS_WEIGHT = 0.001
+cfg.LOSS.ROOT_POSE_LOSS_WEIGHT = 0.4
+cfg.LOSS.SLIDING_LOSS_WEIGHT = 0.5
+cfg.LOSS.CAMERA_LOSS_WEIGHT = 0.04
+cfg.LOSS.LOSS_WEIGHT = 60.
+cfg.LOSS.CAMERA_LOSS_SKIP_EPOCH = 5
+def get_cfg_defaults():
+    """Get a yacs CfgNode object with default values for my_project."""
+    # Return a clone so that the defaults will not be altered
+    # This is for the "local variable" use pattern
+    return cfg.clone()
+def get_cfg(args, test):
+    """
+    Define configuration.
+    """
+    import os
+    cfg = get_cfg_defaults()
+    if os.path.exists(args.cfg):
+        cfg.merge_from_file(args.cfg)
+    cfg.merge_from_list(args.opts)
+    if test:
+        cfg.merge_from_list(['EVAL', True])
+    return cfg.clone()
+def bool_arg(value):
+    if value.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif value.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+def parse_args(test=False):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-c', '--cfg', type=str, default='./configs/debug.yaml', help='cfg file path')
+    parser.add_argument(
+        "--eval-set", type=str, default='3dpw', help="Evaluation dataset")
+    parser.add_argument(
+        "--eval-split", type=str, default='test', help="Evaluation data split")
+    parser.add_argument('--render', default=False, type=bool_arg,
+                        help='Render SMPL meshes after the evaluation')
+    parser.add_argument('--save-results', default=False, type=bool_arg,
+                        help='Save SMPL parameters after the evaluation')
+    parser.add_argument(
+        "opts", default=None, nargs=argparse.REMAINDER,
+        help="Modify config options using the command-line")
+    args = parser.parse_args()
+    print(args, end='\n\n')
+    cfg_file = args.cfg
+    cfg = get_cfg(args, test)
+    return cfg, cfg_file, args

configs/constants.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+import torch
+IMG_FEAT_DIM = {
+    'resnet': 2048,
+    'vit': 1024
+}
+N_JOINTS = 17
+root = 'dataset'
+class PATHS:
+    # Raw data folders
+    PARSED_DATA = f'{root}/parsed_data'
+    AMASS_PTH = f'{root}/AMASS'
+    THREEDPW_PTH = f'{root}/3DPW'
+    HUMAN36M_PTH = f'{root}/Human36M'
+    RICH_PTH = f'{root}/RICH'
+    EMDB_PTH = f'{root}/EMDB'
+    # Processed labels
+    AMASS_LABEL = f'{root}/parsed_data/amass.pth'
+    THREEDPW_LABEL = f'{root}/parsed_data/3dpw_dset_backbone.pth'
+    MPII3D_LABEL = f'{root}/parsed_data/mpii3d_dset_backbone.pth'
+    HUMAN36M_LABEL = f'{root}/parsed_data/human36m_dset_backbone.pth'
+    INSTA_LABEL = f'{root}/parsed_data/insta_dset_backbone.pth'
+    BEDLAM_LABEL = f'{root}/parsed_data/bedlam_train_backbone.pth'
+class KEYPOINTS:
+    NUM_JOINTS = N_JOINTS
+    H36M_TO_J17 = [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10, 0, 7, 9]
+    H36M_TO_J14 = H36M_TO_J17[:14]
+    J17_TO_H36M = [14, 3, 4, 5, 2, 1, 0, 15, 12, 16, 13, 9, 10, 11, 8, 7, 6]
+    COCO_AUG_DICT = f'{root}/body_models/coco_aug_dict.pth'
+    TREE = [[5, 6], 0, 0, 1, 2, -1, -1, 5, 6, 7, 8, -1, -1, 11, 12, 13, 14, 15, 15, 15, 16, 16, 16]
+    # STD scale for video noise
+    S_BIAS = 1e-1
+    S_JITTERING = 5e-2
+    S_PEAK = 3e-1
+    S_PEAK_MASK = 5e-3
+    S_MASK = 0.03
+class BMODEL:
+    MAIN_JOINTS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]    # reduced_joints
+    FLDR = f'{root}/body_models/smpl/'
+    SMPLX2SMPL = f'{root}/body_models/smplx2smpl.pkl'
+    FACES = f'{root}/body_models/smpl_faces.npy'
+    MEAN_PARAMS = f'{root}/body_models/smpl_mean_params.npz'
+    JOINTS_REGRESSOR_WHAM = f'{root}/body_models/J_regressor_wham.npy'
+    JOINTS_REGRESSOR_H36M = f'{root}/body_models/J_regressor_h36m.npy'
+    JOINTS_REGRESSOR_EXTRA = f'{root}/body_models/J_regressor_extra.npy'
+    JOINTS_REGRESSOR_FEET = f'{root}/body_models/J_regressor_feet.npy'
+    PARENTS = torch.tensor([
+        -1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 16, 17, 18, 19, 20, 21])

configs/yamls/demo.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+LOGDIR: ''
+DEVICE: 'cuda'
+EXP_NAME: 'demo'
+OUTPUT_DIR: 'experiments/'
+NUM_WORKERS: 0
+MODEL_CONFIG: 'configs/yamls/model_base.yaml'
+FLIP_EVAL: True
+TRAIN:
+  STAGE: 'stage2'
+  CHECKPOINT: 'checkpoints/wham_vit_bedlam_w_3dpw.pth.tar'
+MODEL:
+  BACKBONE: 'vit'

configs/yamls/model_base.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+architecture: 'RNN'
+in_dim: 49
+n_iters: 1
+pose_dr: 0.15
+d_embed: 512
+n_layers: 3
+layer: 'LSTM'

configs/yamls/stage1.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+LOGDIR: ''
+DEVICE: 'cuda'
+EXP_NAME: 'train_stage1'
+OUTPUT_DIR: 'experiments/'
+NUM_WORKERS: 8
+MODEL_CONFIG: 'configs/yamls/model_base.yaml'
+FLIP_EVAL: True
+SEED_VALUE: 42
+TRAIN:
+  LR: 5e-4
+  BATCH_SIZE: 64
+  END_EPOCH: 100
+  STAGE: 'stage1'
+  CHECKPOINT: ''
+  MILESTONES: [60, 80]
+LOSS:
+  SHAPE_LOSS_WEIGHT: 0.004
+  JOINT3D_LOSS_WEIGHT: 0.4
+  JOINT2D_LOSS_WEIGHT: 0.1
+  POSE_LOSS_WEIGHT: 8.0
+  CASCADED_LOSS_WEIGHT: 0.0
+  SLIDING_LOSS_WEIGHT: 0.5
+  CAMERA_LOSS_WEIGHT: 0.04
+  ROOT_VEL_LOSS_WEIGHT: 0.001
+  LOSS_WEIGHT: 50.0
+  CAMERA_LOSS_SKIP_EPOCH: 5

configs/yamls/stage2.yaml ADDED Viewed

	@@ -0,0 +1,37 @@

+LOGDIR: ''
+DEVICE: 'cuda'
+EXP_NAME: 'train_stage2'
+OUTPUT_DIR: 'experiments'
+NUM_WORKERS: 8
+MODEL_CONFIG: 'configs/yamls/model_base.yaml'
+FLIP_EVAL: True
+SEED_VALUE: 42
+TRAIN:
+  LR: 1e-4
+  LR_FINETUNE: 1e-5
+  STAGE: 'stage2'
+  CHECKPOINT: 'checkpoints/wham_stage1.pth.tar'
+  BATCH_SIZE: 64
+  END_EPOCH: 40
+  MILESTONES: [20, 30]
+  LR_DECAY_RATIO: 0.2
+MODEL:
+  BACKBONE: 'vit'
+LOSS:
+  SHAPE_LOSS_WEIGHT: 0.0
+  JOINT2D_LOSS_WEIGHT: 3.0
+  JOINT3D_LOSS_WEIGHT: 6.0
+  POSE_LOSS_WEIGHT: 1.0
+  CASCADED_LOSS_WEIGHT: 0.05
+  SLIDING_LOSS_WEIGHT: 0.5
+  CAMERA_LOSS_WEIGHT: 0.01
+  ROOT_VEL_LOSS_WEIGHT: 0.001
+  LOSS_WEIGHT: 60.0
+  CAMERA_LOSS_SKIP_EPOCH: 0
+DATASET:
+  SEQLEN: 81
+  RATIO: [0.2, 0.2, 0.2, 0.2, 0.2]

configs/yamls/stage2_b.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+LOGDIR: ''
+DEVICE: 'cuda'
+EXP_NAME: 'train_stage2_b'
+OUTPUT_DIR: 'experiments'
+NUM_WORKERS: 8
+MODEL_CONFIG: 'configs/yamls/model_base.yaml'
+FLIP_EVAL: True
+SEED_VALUE: 42
+TRAIN:
+  LR: 1e-4
+  LR_FINETUNE: 1e-5
+  STAGE: 'stage2'
+  CHECKPOINT: 'checkpoints/wham_stage1.pth.tar'
+  BATCH_SIZE: 64
+  END_EPOCH: 80
+  MILESTONES: [40, 50, 70]
+  LR_DECAY_RATIO: 0.2
+MODEL:
+  BACKBONE: 'vit'
+LOSS:
+  SHAPE_LOSS_WEIGHT: 0.0
+  JOINT2D_LOSS_WEIGHT: 5.0
+  JOINT3D_LOSS_WEIGHT: 5.0
+  VERTS3D_LOSS_WEIGHT: 1.0
+  POSE_LOSS_WEIGHT: 3.0
+  CASCADED_LOSS_WEIGHT: 0.05
+  SLIDING_LOSS_WEIGHT: 0.5
+  CAMERA_LOSS_WEIGHT: 0.01
+  ROOT_VEL_LOSS_WEIGHT: 0.001
+  LOSS_WEIGHT: 60.0
+  CAMERA_LOSS_SKIP_EPOCH: 0
+DATASET:
+  SEQLEN: 81
+  RATIO: [0.2, 0.2, 0.2, 0.2, 0.0, 0.2]

dataset/body_models/J_regressor_coco.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0cd49241810715e752aa7384363b7bc09fb96b386ca99aa1c3eb2c0d15d6b8b9
+size 468648

dataset/body_models/J_regressor_feet.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ef9e6d64796f2f342983a9fde6a6d9f8e3544f1239e7f86aa4f6b7aa82f4cf6
+size 220608

dataset/body_models/J_regressor_h36m.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c655cd7013d7829eb9acbebf0e43f952a3fa0305a53c35880e39192bfb6444a0
+size 937168

dataset/body_models/J_regressor_wham.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f938dcfd5cd88d0b19ee34e442d49f1dc370d3d8c4f5aef57a93d0cf2e267c4c
+size 854488

dataset/body_models/smpl/SMPL_FEMALE.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a583c1b98e4afc19042641f1bae5cd8a1f712a6724886291a7627ec07acd408d
+size 39056454

dataset/body_models/smpl/SMPL_MALE.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e8c0bbbbc635dcb166ed29c303fb4bef16ea5f623e5a89263495a9e403575bd
+size 39056404

dataset/body_models/smpl/SMPL_NEUTRAL.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98e65c74ad9b998783132f00880d1025a8d64b158e040e6ef13a557e5098bc42
+size 39001280

dataset/body_models/smpl/__MACOSX/._smpl ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/._.DS_Store ADDED Viewed

Binary file (120 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/.___init__.py ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/._models ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/._smpl_webuser ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/models/basicModel_f_lbs_10_207_0_v1.0.0.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a583c1b98e4afc19042641f1bae5cd8a1f712a6724886291a7627ec07acd408d
+size 39056454

dataset/body_models/smpl/__MACOSX/smpl/models/basicmodel_m_lbs_10_207_0_v1.0.0.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e8c0bbbbc635dcb166ed29c303fb4bef16ea5f623e5a89263495a9e403575bd
+size 39056404

dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._LICENSE.txt ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._README.txt ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/.___init__.py ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._hello_world ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._lbs.py ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._posemapper.py ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._serialization.py ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/._verts.py ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._hello_smpl.py ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl/__MACOSX/smpl/smpl_webuser/hello_world/._render_smpl.py ADDED Viewed

Binary file (239 Bytes). View file

dataset/body_models/smpl_faces.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51fc11ebadb0487d74bef220c4eea43f014609249f0121413c1fc629d859fecb
+size 165392

dataset/body_models/smpl_mean_params.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd6dd687800da946d0a0492383f973b92ec20f166a0b829775882868c35fcdd
+size 1310

dataset/body_models/smplx2smpl.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1d912d121ad98132e4492d8e7a0f1a8cf4412811e14a7ef8cb337bb48eef99e
+size 578019251

demo.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import os
+import argparse
+import os.path as osp
+from glob import glob
+from collections import defaultdict
+import cv2
+import torch
+import joblib
+import numpy as np
+from loguru import logger
+from progress.bar import Bar
+from configs.config import get_cfg_defaults
+from lib.data.datasets import CustomDataset
+from lib.utils.imutils import avg_preds
+from lib.utils.transforms import matrix_to_axis_angle
+from lib.models import build_network, build_body_model
+from lib.models.preproc.detector import DetectionModel
+from lib.models.preproc.extractor import FeatureExtractor
+from lib.models.smplify import TemporalSMPLify
+try:
+    from lib.models.preproc.slam import SLAMModel
+    _run_global = True
+except:
+    logger.info('DPVO is not properly installed. Only estimate in local coordinates !')
+    _run_global = False
+def run(cfg,
+        video,
+        output_pth,
+        network,
+        calib=None,
+        run_global=True,
+        save_pkl=False,
+        visualize=False):
+    cap = cv2.VideoCapture(video)
+    assert cap.isOpened(), f'Faild to load video file {video}'
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    width, height = cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
+    # Whether or not estimating motion in global coordinates
+    run_global = run_global and _run_global
+    # Preprocess
+    with torch.no_grad():
+        if not (osp.exists(osp.join(output_pth, 'tracking_results.pth')) and
+                osp.exists(osp.join(output_pth, 'slam_results.pth'))):
+            detector = DetectionModel(cfg.DEVICE.lower())
+            extractor = FeatureExtractor(cfg.DEVICE.lower(), cfg.FLIP_EVAL)
+            if run_global: slam = SLAMModel(video, output_pth, width, height, calib)
+            else: slam = None
+            bar = Bar('Preprocess: 2D detection and SLAM', fill='#', max=length)
+            while (cap.isOpened()):
+                flag, img = cap.read()
+                if not flag: break
+                # 2D detection and tracking
+                detector.track(img, fps, length)
+                # SLAM
+                if slam is not None:
+                    slam.track()
+                bar.next()
+            tracking_results = detector.process(fps)
+            if slam is not None:
+                slam_results = slam.process()
+            else:
+                slam_results = np.zeros((length, 7))
+                slam_results[:, 3] = 1.0    # Unit quaternion
+            # Extract image features
+            # TODO: Merge this into the previous while loop with an online bbox smoothing.
+            tracking_results = extractor.run(video, tracking_results)
+            logger.info('Complete Data preprocessing!')
+            # Save the processed data
+            joblib.dump(tracking_results, osp.join(output_pth, 'tracking_results.pth'))
+            joblib.dump(slam_results, osp.join(output_pth, 'slam_results.pth'))
+            logger.info(f'Save processed data at {output_pth}')
+        # If the processed data already exists, load the processed data
+        else:
+            tracking_results = joblib.load(osp.join(output_pth, 'tracking_results.pth'))
+            slam_results = joblib.load(osp.join(output_pth, 'slam_results.pth'))
+            logger.info(f'Already processed data exists at {output_pth} ! Load the data .')
+    # Build dataset
+    dataset = CustomDataset(cfg, tracking_results, slam_results, width, height, fps)
+    # run WHAM
+    results = defaultdict(dict)
+    n_subjs = len(dataset)
+    for subj in range(n_subjs):
+        with torch.no_grad():
+            if cfg.FLIP_EVAL:
+                # Forward pass with flipped input
+                flipped_batch = dataset.load_data(subj, True)
+                _id, x, inits, features, mask, init_root, cam_angvel, frame_id, kwargs = flipped_batch
+                flipped_pred = network(x, inits, features, mask=mask, init_root=init_root, cam_angvel=cam_angvel, return_y_up=True, **kwargs)
+                # Forward pass with normal input
+                batch = dataset.load_data(subj)
+                _id, x, inits, features, mask, init_root, cam_angvel, frame_id, kwargs = batch
+                pred = network(x, inits, features, mask=mask, init_root=init_root, cam_angvel=cam_angvel, return_y_up=True, **kwargs)
+                # Merge two predictions
+                flipped_pose, flipped_shape = flipped_pred['pose'].squeeze(0), flipped_pred['betas'].squeeze(0)
+                pose, shape = pred['pose'].squeeze(0), pred['betas'].squeeze(0)
+                flipped_pose, pose = flipped_pose.reshape(-1, 24, 6), pose.reshape(-1, 24, 6)
+                avg_pose, avg_shape = avg_preds(pose, shape, flipped_pose, flipped_shape)
+                avg_pose = avg_pose.reshape(-1, 144)
+                avg_contact = (flipped_pred['contact'][..., [2, 3, 0, 1]] + pred['contact']) / 2
+                # Refine trajectory with merged prediction
+                network.pred_pose = avg_pose.view_as(network.pred_pose)
+                network.pred_shape = avg_shape.view_as(network.pred_shape)
+                network.pred_contact = avg_contact.view_as(network.pred_contact)
+                output = network.forward_smpl(**kwargs)
+                pred = network.refine_trajectory(output, cam_angvel, return_y_up=True)
+            else:
+                # data
+                batch = dataset.load_data(subj)
+                _id, x, inits, features, mask, init_root, cam_angvel, frame_id, kwargs = batch
+                # inference
+                pred = network(x, inits, features, mask=mask, init_root=init_root, cam_angvel=cam_angvel, return_y_up=True, **kwargs)
+        # if False:
+        if args.run_smplify:
+            smplify = TemporalSMPLify(smpl, img_w=width, img_h=height, device=cfg.DEVICE)
+            input_keypoints = dataset.tracking_results[_id]['keypoints']
+            pred = smplify.fit(pred, input_keypoints, **kwargs)
+            with torch.no_grad():
+                network.pred_pose = pred['pose']
+                network.pred_shape = pred['betas']
+                network.pred_cam = pred['cam']
+                output = network.forward_smpl(**kwargs)
+                pred = network.refine_trajectory(output, cam_angvel, return_y_up=True)
+        # ========= Store results ========= #
+        pred_body_pose = matrix_to_axis_angle(pred['poses_body']).cpu().numpy().reshape(-1, 69)
+        pred_root = matrix_to_axis_angle(pred['poses_root_cam']).cpu().numpy().reshape(-1, 3)
+        pred_root_world = matrix_to_axis_angle(pred['poses_root_world']).cpu().numpy().reshape(-1, 3)
+        pred_pose = np.concatenate((pred_root, pred_body_pose), axis=-1)
+        pred_pose_world = np.concatenate((pred_root_world, pred_body_pose), axis=-1)
+        pred_trans = (pred['trans_cam'] - network.output.offset).cpu().numpy()
+        results[_id]['pose'] = pred_pose
+        results[_id]['trans'] = pred_trans
+        results[_id]['pose_world'] = pred_pose_world
+        results[_id]['trans_world'] = pred['trans_world'].cpu().squeeze(0).numpy()
+        results[_id]['betas'] = pred['betas'].cpu().squeeze(0).numpy()
+        results[_id]['verts'] = (pred['verts_cam'] + pred['trans_cam'].unsqueeze(1)).cpu().numpy()
+        results[_id]['frame_ids'] = frame_id
+    if save_pkl:
+        joblib.dump(results, osp.join(output_pth, "wham_output.pkl"))
+    # Visualize
+    if visualize:
+        from lib.vis.run_vis import run_vis_on_demo
+        with torch.no_grad():
+            run_vis_on_demo(cfg, video, results, output_pth, network.smpl, vis_global=run_global)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--video', type=str,
+                        default='examples/demo_video.mp4',
+                        help='input video path or youtube link')
+    parser.add_argument('--output_pth', type=str, default='output/demo',
+                        help='output folder to write results')
+    parser.add_argument('--calib', type=str, default=None,
+                        help='Camera calibration file path')
+    parser.add_argument('--estimate_local_only', action='store_true',
+                        help='Only estimate motion in camera coordinate if True')
+    parser.add_argument('--visualize', action='store_true',
+                        help='Visualize the output mesh if True')
+    parser.add_argument('--save_pkl', action='store_true',
+                        help='Save output as pkl file')
+    parser.add_argument('--run_smplify', action='store_true',
+                        help='Run Temporal SMPLify for post processing')
+    args = parser.parse_args()
+    cfg = get_cfg_defaults()
+    cfg.merge_from_file('configs/yamls/demo.yaml')
+    logger.info(f'GPU name -> {torch.cuda.get_device_name()}')
+    logger.info(f'GPU feat -> {torch.cuda.get_device_properties("cuda")}')
+    # ========= Load WHAM ========= #
+    smpl_batch_size = cfg.TRAIN.BATCH_SIZE * cfg.DATASET.SEQLEN
+    smpl = build_body_model(cfg.DEVICE, smpl_batch_size)
+    network = build_network(cfg, smpl)
+    network.eval()
+    # Output folder
+    sequence = '.'.join(args.video.split('/')[-1].split('.')[:-1])
+    output_pth = osp.join(args.output_pth, sequence)
+    os.makedirs(output_pth, exist_ok=True)
+    run(cfg,
+        args.video,
+        output_pth,
+        network,
+        args.calib,
+        run_global=not args.estimate_local_only,
+        save_pkl=args.save_pkl,
+        visualize=args.visualize)
+    print()
+    logger.info('Done !')

docs/API.md ADDED Viewed

	@@ -0,0 +1,18 @@

+## Python API
+To use python API of WHAM, please finish the basic installation first ([Installation](INSTALL.md) or [Docker](DOCKER.md)).
+If you use Docker environment, please run:
+```bash
+cd /path/to/WHAM
+docker run -it -v .:/code/ --rm yusun9/wham-vitpose-dpvo-cuda11.3-python3.9 python
+```
+Then you can run wham via python code like
+```bash
+from wham_api import WHAM_API
+wham_model = WHAM_API()
+input_video_path = 'examples/IMG_9732.mov'
+results, tracking_results, slam_results = wham_model(input_video_path)
+```

docs/DATASET.md ADDED Viewed

	@@ -0,0 +1,42 @@

+# Dataset
+## Training Data
+We use [AMASS](https://amass.is.tue.mpg.de/), [InstaVariety](https://github.com/akanazawa/human_dynamics/blob/master/doc/insta_variety.md), [MPI-INF-3DHP](https://vcai.mpi-inf.mpg.de/3dhp-dataset/), [Human3.6M](http://vision.imar.ro/human3.6m/description.php), and [3DPW](https://virtualhumans.mpi-inf.mpg.de/3DPW/) datasets for training. Please register to their websites to download and process the data. You can download parsed ViT version of InstaVariety, MPI-INF-3DHP, Human3.6M, and 3DPW data from the [Google Drive](https://drive.google.com/drive/folders/13T2ghVvrw_fEk3X-8L0e6DVSYx_Og8o3?usp=sharing). You can save the data under `dataset/parsed_data` folder.
+### Process AMASS dataset
+After downloading AMASS dataset, you can process it by running:
+```bash
+python -m lib.data_utils.amass_utils
+```
+The processed data will be stored at `dataset/parsed_data/amass.pth`.
+### Process 3DPW, MPII3D, Human3.6M, and InstaVariety datasets
+First, visit [TCMR](https://github.com/hongsukchoi/TCMR_RELEASE) and download preprocessed data at `dataset/parsed_data/TCMR_preproc/'.
+Next, prepare 2D keypoints detection using [ViTPose](https://github.com/ViTAE-Transformer/ViTPose) and store the results at `dataset/detection_results/\<DATAsET-NAME>/\<SEQUENCE_NAME.npy>'. You may need to download all images to prepare the detection results.
+For Human36M, MPII3D, and InstaVariety datasets, you need to also download [NeuralAnnot](https://github.com/mks0601/NeuralAnnot_RELEASE) pseudo groundtruth SMPL label. As mentioned in our paper, we do not supervise WHAM on this label, but use it for neural initialization step.
+Finally, run following codes to preprocess all training data.
+```bash
+python -m lib.data_utils.threedpw_train_utils       # 3DPW dataset
+# [Coming] python -m lib.data_utils.human36m_train_utils       # Human3.6M dataset
+# [Coming] python -m lib.data_utils.mpii3d_train_utils         # MPI-INF-3DHP dataset
+# [Coming] python -m lib.data_utils.insta_train_utils          # InstaVariety dataset
+```
+### Process BEDLAM dataset
+Will be updated.
+## Evaluation Data
+We use [3DPW](https://virtualhumans.mpi-inf.mpg.de/3DPW/), [RICH](https://rich.is.tue.mpg.de/), and [EMDB](https://eth-ait.github.io/emdb/) for the evaluation. We provide the parsed data for the evaluation. Please download the data from [Google Drive](https://drive.google.com/drive/folders/13T2ghVvrw_fEk3X-8L0e6DVSYx_Og8o3?usp=sharing) and place them at `dataset/parsed_data/`.
+To process the data at your end, please
+1) Download parsed 3DPW data from [TCMR](https://github.com/hongsukchoi/TCMR_RELEASE) and store `dataset/parsed_data/TCMR_preproc/'.
+2) Run [ViTPose](https://github.com/ViTAE-Transformer/ViTPose) on all test data and store the results at `dataset/detection_results/\<DATAsET-NAME>'.
+3) Run following codes.
+```bash
+python -m lib.data_utils.threedpw_eval_utils --split <"val" or "test">      # 3DPW dataset
+python -m lib.data_utils.emdb_eval_utils --split <"1" or "2">               # EMDB dataset
+python -m lib.data_utils.rich_eval_utils                                    # RICH dataset
+```

docs/DOCKER.md ADDED Viewed

	@@ -0,0 +1,23 @@

+## Installation
+### Pre-requirments
+1. Please make sure that you have properly installed the [Docker](https://www.docker.com/) and [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) before installation.
+2. Please prepare the essential data for inference:
+To download SMPL body models (Neutral, Female, and Male), you need to register for [SMPL](https://smpl.is.tue.mpg.de/) and [SMPLify](https://smplify.is.tue.mpg.de/). The username and password for both homepages will be used while fetching the demo data.
+Next, run the following script to fetch demo data. This script will download all the required dependencies including trained models and demo videos.
+```bash
+bash fetch_demo_data.sh
+```
+### Usage
+1. Pulling the docker image from docker hub:
+```bash
+docker pull yusun9/wham-vitpose-dpvo-cuda11.3-python3.9:latest
+```
+2. Run the code with docker environment:
+```bash
+cd /path/to/WHAM
+docker run -v .:/code/ --rm yusun9/wham-vitpose-dpvo-cuda11.3-python3.9 python demo.py --video examples/IMG_9732.mov
+```

docs/INSTALL.md ADDED Viewed

	@@ -0,0 +1,38 @@

+# Installation
+WHAM has been implemented and tested on Ubuntu 20.04 and 22.04 with python = 3.9. We provide [anaconda](https://www.anaconda.com/) environment to run WHAM as below.
+```bash
+# Clone the repo
+git clone https://github.com/yohanshin/WHAM.git --recursive
+cd WHAM/
+# Create Conda environment
+conda create -n wham python=3.9
+conda activate wham
+# Install PyTorch libraries
+conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch
+# Install PyTorch3D (optional) for visualization
+conda install -c fvcore -c iopath -c conda-forge fvcore iopath
+pip install pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py39_cu113_pyt1110/download.html
+# Install WHAM dependencies
+pip install -r requirements.txt
+# Install ViTPose
+pip install -v -e third-party/ViTPose
+# Install DPVO
+cd third-party/DPVO
+wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip
+unzip eigen-3.4.0.zip -d thirdparty && rm -rf eigen-3.4.0.zip
+conda install pytorch-scatter=2.0.9 -c rusty1s
+conda install cudatoolkit-dev=11.3.1 -c conda-forge
+# ONLY IF your GCC version is larger than 10
+conda install -c conda-forge gxx=9.5
+pip install .
+```

examples/IMG_9730.mov ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3739b87ba0c64d047df3d8f5479c530377788fdab4c2283925477894a1d252f9
+size 21526220

examples/IMG_9731.mov ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:116ad3f95743524283a234fd9e7a1152b28a04536ab5975f4e4e71c547d9e1a6
+size 22633328

examples/IMG_9732.mov ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:168773c92e0112361dcd1da4154c915983490e58ff89102c1a65edb28d505813
+size 23960355

examples/drone_calib.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 1321.0 1321.0 960.0 540.0