Spaces:

YoonaAI
/

yoonaAvatarSpace1

Build error

App Files Files

YoonaAI commited on Feb 7, 2023

Commit

523c2b9

0 Parent(s):

Duplicate from YoonaAI/yoonaAvatarSpace

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +18 -0
assets/garment_teaser.png +3 -0
assets/intermediate_results.png +3 -0
assets/teaser.gif +3 -0
assets/thumbnail.png +3 -0
examples/22097467bffc92d4a5c4246f7d4edb75.png +3 -0
examples/44c0f84c957b6b9bdf77662af5bb7078.png +3 -0
examples/5a6a25963db2f667441d5076972c207c.png +3 -0
examples/8da7ceb94669c2f65cbd28022e1f9876.png +3 -0
examples/923d65f767c85a42212cae13fba3750b.png +3 -0
examples/c9856a2bc31846d684cbb965457fad59.png +3 -0
examples/e1e7622af7074a022f5d96dc16672517.png +3 -0
examples/fb9d20fdb93750584390599478ecf86e.png +3 -0
.gitattributes +37 -0
README.md +14 -0
app.py +144 -0
apps/ICON.py +735 -0
apps/Normal.py +220 -0
apps/infer.py +492 -0
configs / icon-filter.yaml +25 -0
configs / icon-nofilter.yaml +25 -0
configs /pamir.yaml +24 -0
configs /pifu.yaml +24 -0
lib / pymaf / configs / pymaf_config.yaml +47 -0
lib / pymaf /core / __init__.py +0 -0
lib / pymaf /core / train_options.py +135 -0
lib / pymaf /core /base_trainer.py +107 -0
lib / pymaf /core /cfgs.py +100 -0
lib / pymaf /core /constants.py +153 -0
lib / pymaf /core /fits_dict.py +133 -0
lib / pymaf /core /path_config.py +24 -0
lib / pymaf /models / __init__.py +3 -0
lib / pymaf /models / pymaf_net.py +362 -0
lib / pymaf /models / smpl.py +92 -0
lib / pymaf /models /hmr.py +303 -0
lib / pymaf /models /maf_extractor.py +135 -0
lib / pymaf /models /res_module.py +385 -0
lib / pymaf /utils / __init__.py +0 -0
lib / pymaf /utils / geometry.py +435 -0
lib / pymaf /utils / imutils.py +491 -0
lib / pymaf /utils / streamer.py +142 -0
lib / pymaf /utils /transforms.py +78 -0
lib / renderer / __init__.py +0 -0
lib / renderer / camera.py +226 -0
lib / renderer / gl / __init__.py +0 -0
lib / renderer / gl / data / color.fs +20 -0
lib / renderer / gl / data /color.vs +29 -0
lib / renderer / gl / data /normal.fs +12 -0
lib / renderer / gl / data /normal.vs +15 -0
lib / renderer / gl / data /prt.fs +157 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,18 @@

+data/*/*
+data/thuman*
+!data/tbfo.ttf
+__pycache__
+debug/
+log/
+.vscode
+!.gitignore
+force_push.sh
+.idea
+human_det/
+kaolin/
+neural_voxelization_layer/
+pytorch3d/
+force_push.sh
+results/
+gradio_cached_examples/
+gradio_queue.db

assets/garment_teaser.png ADDED Viewed

Git LFS Details

SHA256: 1bf1fde8dcec40a5b50a5eb3ba6cdeefad344348271b9b087d9f327efc5db845
Pointer size: 131 Bytes
Size of remote file: 594 kB

assets/intermediate_results.png ADDED Viewed

Git LFS Details

SHA256: 2daa92446130e9bf410ba55889740537c68f9c51f1799f89f2575581870c0d80
Pointer size: 131 Bytes
Size of remote file: 301 kB

assets/teaser.gif ADDED Viewed

Git LFS Details

SHA256: 0955111cbe83559ee8065b15dfed9f52da9e8190297c715d74d1a30cdee7cad5
Pointer size: 131 Bytes
Size of remote file: 382 kB

assets/thumbnail.png ADDED Viewed

Git LFS Details

SHA256: 5259d6e413242c63afe88027122eed783612ff9a9e48b9a9c51313f6bf66fb94
Pointer size: 130 Bytes
Size of remote file: 51.5 kB

examples/22097467bffc92d4a5c4246f7d4edb75.png ADDED Viewed

Git LFS Details

SHA256: f37625631d1cea79fca0c77d6a809e827f86d2ddc51515abaade0801b9ef1a57
Pointer size: 131 Bytes
Size of remote file: 448 kB

examples/44c0f84c957b6b9bdf77662af5bb7078.png ADDED Viewed

Git LFS Details

SHA256: b5ccc3ff6e99b32fed04bdd8f72873e7d987e088e83bbb235152db0500fdc6dc
Pointer size: 131 Bytes
Size of remote file: 182 kB

examples/5a6a25963db2f667441d5076972c207c.png ADDED Viewed

Git LFS Details

SHA256: a4e0773d094b45a7c496292e5352166d6f47e469c2c6101ffa9536e44007a4e3
Pointer size: 131 Bytes
Size of remote file: 523 kB

examples/8da7ceb94669c2f65cbd28022e1f9876.png ADDED Viewed

Git LFS Details

SHA256: 7be8a036e6f3d11db05f0c6a93de165dae4c2afc052d09f6660c43a0a0484e99
Pointer size: 131 Bytes
Size of remote file: 286 kB

examples/923d65f767c85a42212cae13fba3750b.png ADDED Viewed

Git LFS Details

SHA256: 86f4eff6d64d036a91d193e2373a76fd6698b8a3cd8be01e65b96a742907838d
Pointer size: 131 Bytes
Size of remote file: 773 kB

examples/c9856a2bc31846d684cbb965457fad59.png ADDED Viewed

Git LFS Details

SHA256: b97743cb85d8b2db10f86b5216a67f0df0ff84b71665d2be451dcd517c557fb6
Pointer size: 131 Bytes
Size of remote file: 157 kB

examples/e1e7622af7074a022f5d96dc16672517.png ADDED Viewed

Git LFS Details

SHA256: badb5a8c2d9591aa4c71915795cb3d229678cad3612f2ee36d399174de32004e
Pointer size: 131 Bytes
Size of remote file: 652 kB

examples/fb9d20fdb93750584390599478ecf86e.png ADDED Viewed

Git LFS Details

SHA256: ae80334944bb3c9496565dbe28e0ec30d2150344b600b6aac5c917c8c6ef4f1f
Pointer size: 131 Bytes
Size of remote file: 623 kB

.gitattributes ADDED Viewed

	@@ -0,0 +1,37 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.obj filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.glb filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: YoonaAvatar
+sdk: gradio
+emoji: 🔥
+colorFrom: red
+colorTo: purple
+sdk_version: 3.2
+app_file: app.py
+pinned: false
+python_version: 3.8.13
+duplicated_from: YoonaAI/yoonaAvatarSpace
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+# install
+import glob
+import gradio as gr
+import os
+import numpy as np
+import subprocess
+if os.getenv('SYSTEM') == 'spaces':
+    subprocess.run('pip install pyembree'.split())
+    subprocess.run(
+        'pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html'.split())
+    subprocess.run(
+        'pip install https://download.is.tue.mpg.de/icon/HF/kaolin-0.11.0-cp38-cp38-linux_x86_64.whl'.split())
+    subprocess.run(
+        'pip install https://download.is.tue.mpg.de/icon/HF/pytorch3d-0.7.0-cp38-cp38-linux_x86_64.whl'.split())
+    subprocess.run(
+        'pip install git+https://github.com/YuliangXiu/neural_voxelization_layer.git'.split())
+from apps.infer import generate_model
+# running
+description = '''
+# ICON Clothed Human Digitization
+### ICON: Implicit Clothed humans Obtained from Normals (CVPR 2022)
+<table>
+<th>
+<ul>
+<li><strong>Homepage</strong> <a href="http://icon.is.tue.mpg.de">icon.is.tue.mpg.de</a></li>
+<li><strong>Code</strong> <a href="https://github.com/YuliangXiu/ICON">YuliangXiu/ICON</a></li>
+<li><strong>Paper</strong> <a href="https://arxiv.org/abs/2112.09127">arXiv</a>, <a href="https://readpaper.com/paper/4569785684533977089">ReadPaper</a></li>
+<li><strong>Chatroom</strong> <a href="https://discord.gg/Vqa7KBGRyk">Discord</a></li>
+<li><strong>Colab Notebook</strong> <a href="https://colab.research.google.com/drive/1-AWeWhPvCTBX0KfMtgtMk10uPU05ihoA?usp=sharing">Google Colab</a></li>
+</ul>
+<a href="https://twitter.com/yuliangxiu"><img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/yuliangxiu?style=social"></a>
+<iframe src="https://ghbtns.com/github-btn.html?user=yuliangxiu&repo=ICON&type=star&count=true&v=2&size=small" frameborder="0" scrolling="0" width="100" height="20"></iframe>
+<a href="https://youtu.be/hZd6AYin2DE"><img alt="YouTube Video Views" src="https://img.shields.io/youtube/views/hZd6AYin2DE?style=social"></a>
+</th>
+<th>
+<iframe width="560" height="315" src="https://www.youtube.com/embed/hZd6AYin2DE" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+</th>
+</table>
+<h4> The reconstruction + refinement + video take about 200 seconds for single image. <span style="color:red"> If ERROR, try "Submit Image" again.</span></h4>
+<details>
+<summary>More</summary>
+#### Citation
+```
+@inproceedings{xiu2022icon,
+  title     = {{ICON}: {I}mplicit {C}lothed humans {O}btained from {N}ormals},
+  author    = {Xiu, Yuliang and Yang, Jinlong and Tzionas, Dimitrios and Black, Michael J.},
+  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  month     = {June},
+  year      = {2022},
+  pages     = {13296-13306}
+}
+```
+#### Acknowledgments:
+- [StyleGAN-Human, ECCV 2022](https://stylegan-human.github.io/)
+- [nagolinc/styleGanHuman_and_PIFu](https://huggingface.co/spaces/nagolinc/styleGanHuman_and_PIFu)
+- [radames/PIFu-Clothed-Human-Digitization](https://huggingface.co/spaces/radames/PIFu-Clothed-Human-Digitization)
+#### Image Credits
+* [Pinterest](https://www.pinterest.com/search/pins/?q=parkour&rs=sitelinks_searchbox)
+#### Related works
+* [ICON @ MPI](https://icon.is.tue.mpg.de/)
+* [MonoPort @ USC](https://xiuyuliang.cn/monoport)
+* [Phorhum @ Google](https://phorhum.github.io/)
+* [PIFuHD @ Meta](https://shunsukesaito.github.io/PIFuHD/)
+* [PaMIR @ Tsinghua](http://www.liuyebin.com/pamir/pamir.html)
+</details>
+'''
+def generate_image(seed, psi):
+    iface = gr.Interface.load("spaces/hysts/StyleGAN-Human")
+    img = iface(seed, psi)
+    return img
+model_types = ['ICON', 'PIFu', 'PaMIR']
+examples_names = glob.glob('examples/*.png')
+examples_types = np.random.choice(
+    model_types, len(examples_names), p=[0.6, 0.2, 0.2])
+examples = [list(item) for item in zip(examples_names, examples_types)]
+with gr.Blocks() as demo:
+    gr.Markdown(description)
+    out_lst = []
+    with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                with gr.Column():
+                    seed = gr.inputs.Slider(
+                        0, 1000, step=1, default=0, label='Seed (For Image Generation)')
+                    psi = gr.inputs.Slider(
+                        0, 2, step=0.05, default=0.7, label='Truncation psi (For Image Generation)')
+                    radio_choice = gr.Radio(
+                        model_types, label='Method (For Reconstruction)', value='icon-filter')
+                inp = gr.Image(type="filepath", label="Input Image")
+            with gr.Row():
+                btn_sample = gr.Button("Generate Image")
+                btn_submit = gr.Button("Submit Image")
+            gr.Examples(examples=examples,
+                        inputs=[inp, radio_choice],
+                        cache_examples=False,
+                        fn=generate_model,
+                        outputs=out_lst)
+            out_vid = gr.Video(
+                label="Image + Normal + SMPL Body + Clothed Human")
+            out_vid_download = gr.File(
+                label="Download Video, welcome share on Twitter with #ICON")
+        with gr.Column():
+            overlap_inp = gr.Image(
+                type="filepath", label="Image Normal Overlap")
+            out_final = gr.Model3D(
+                clear_color=[0.0, 0.0, 0.0, 0.0],  label="Clothed human")
+            out_final_download = gr.File(
+                label="Download clothed human mesh")
+            out_smpl = gr.Model3D(
+                clear_color=[0.0, 0.0, 0.0, 0.0],  label="SMPL body")
+            out_smpl_download = gr.File(label="Download SMPL body mesh")
+            out_smpl_npy_download = gr.File(label="Download SMPL params")
+    out_lst = [out_smpl, out_smpl_download, out_smpl_npy_download,
+               out_final, out_final_download, out_vid, out_vid_download, overlap_inp]
+    btn_submit.click(fn=generate_model, inputs=[
+                     inp, radio_choice], outputs=out_lst)
+    btn_sample.click(fn=generate_image, inputs=[seed, psi], outputs=inp)
+if __name__ == "__main__":
+    # demo.launch(debug=False, enable_queue=False,
+    #             auth=(os.environ['USER'], os.environ['PASSWORD']),
+    #             auth_message="Register at icon.is.tue.mpg.de to get HuggingFace username and password.")
+    demo.launch(debug=True, enable_queue=True)

apps/ICON.py ADDED Viewed

	@@ -0,0 +1,735 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: [email protected]
+from lib.common.seg3d_lossless import Seg3dLossless
+from lib.dataset.Evaluator import Evaluator
+from lib.net import HGPIFuNet
+from lib.common.train_util import *
+from lib.common.render import Render
+from lib.dataset.mesh_util import SMPLX, update_mesh_shape_prior_losses, get_visibility
+import torch
+import lib.smplx as smplx
+import numpy as np
+from torch import nn
+from skimage.transform import resize
+import pytorch_lightning as pl
+torch.backends.cudnn.benchmark = True
+class ICON(pl.LightningModule):
+    def __init__(self, cfg):
+        super(ICON, self).__init__()
+        self.cfg = cfg
+        self.batch_size = self.cfg.batch_size
+        self.lr_G = self.cfg.lr_G
+        self.use_sdf = cfg.sdf
+        self.prior_type = cfg.net.prior_type
+        self.mcube_res = cfg.mcube_res
+        self.clean_mesh_flag = cfg.clean_mesh
+        self.netG = HGPIFuNet(
+            self.cfg,
+            self.cfg.projection_mode,
+            error_term=nn.SmoothL1Loss() if self.use_sdf else nn.MSELoss(),
+        )
+        self.evaluator = Evaluator(
+            device=torch.device(f"cuda:{self.cfg.gpus[0]}"))
+        self.resolutions = (np.logspace(
+            start=5,
+            stop=np.log2(self.mcube_res),
+            base=2,
+            num=int(np.log2(self.mcube_res) - 4),
+            endpoint=True,
+        ) + 1.0)
+        self.resolutions = self.resolutions.astype(np.int16).tolist()
+        self.base_keys = ["smpl_verts", "smpl_faces"]
+        self.feat_names = self.cfg.net.smpl_feats
+        self.icon_keys = self.base_keys + [
+            f"smpl_{feat_name}" for feat_name in self.feat_names
+        ]
+        self.keypoint_keys = self.base_keys + [
+            f"smpl_{feat_name}" for feat_name in self.feat_names
+        ]
+        self.pamir_keys = [
+            "voxel_verts", "voxel_faces", "pad_v_num", "pad_f_num"
+        ]
+        self.pifu_keys = []
+        self.reconEngine = Seg3dLossless(
+            query_func=query_func,
+            b_min=[[-1.0, 1.0, -1.0]],
+            b_max=[[1.0, -1.0, 1.0]],
+            resolutions=self.resolutions,
+            align_corners=True,
+            balance_value=0.50,
+            device=torch.device(f"cuda:{self.cfg.test_gpus[0]}"),
+            visualize=False,
+            debug=False,
+            use_cuda_impl=False,
+            faster=True,
+        )
+        self.render = Render(
+            size=512, device=torch.device(f"cuda:{self.cfg.test_gpus[0]}"))
+        self.smpl_data = SMPLX()
+        self.get_smpl_model = lambda smpl_type, gender, age, v_template: smplx.create(
+            self.smpl_data.model_dir,
+            kid_template_path=osp.join(
+                osp.realpath(self.smpl_data.model_dir),
+                f"{smpl_type}/{smpl_type}_kid_template.npy",
+            ),
+            model_type=smpl_type,
+            gender=gender,
+            age=age,
+            v_template=v_template,
+            use_face_contour=False,
+            ext="pkl",
+        )
+        self.in_geo = [item[0] for item in cfg.net.in_geo]
+        self.in_nml = [item[0] for item in cfg.net.in_nml]
+        self.in_geo_dim = [item[1] for item in cfg.net.in_geo]
+        self.in_total = self.in_geo + self.in_nml
+        self.smpl_dim = cfg.net.smpl_dim
+        self.export_dir = None
+        self.result_eval = {}
+    def get_progress_bar_dict(self):
+        tqdm_dict = super().get_progress_bar_dict()
+        if "v_num" in tqdm_dict:
+            del tqdm_dict["v_num"]
+        return tqdm_dict
+    # Training related
+    def configure_optimizers(self):
+        # set optimizer
+        weight_decay = self.cfg.weight_decay
+        momentum = self.cfg.momentum
+        optim_params_G = [{
+            "params": self.netG.if_regressor.parameters(),
+            "lr": self.lr_G
+        }]
+        if self.cfg.net.use_filter:
+            optim_params_G.append({
+                "params": self.netG.F_filter.parameters(),
+                "lr": self.lr_G
+            })
+        if self.cfg.net.prior_type == "pamir":
+            optim_params_G.append({
+                "params": self.netG.ve.parameters(),
+                "lr": self.lr_G
+            })
+        if self.cfg.optim == "Adadelta":
+            optimizer_G = torch.optim.Adadelta(optim_params_G,
+                                               lr=self.lr_G,
+                                               weight_decay=weight_decay)
+        elif self.cfg.optim == "Adam":
+            optimizer_G = torch.optim.Adam(optim_params_G,
+                                           lr=self.lr_G,
+                                           weight_decay=weight_decay)
+        elif self.cfg.optim == "RMSprop":
+            optimizer_G = torch.optim.RMSprop(
+                optim_params_G,
+                lr=self.lr_G,
+                weight_decay=weight_decay,
+                momentum=momentum,
+            )
+        else:
+            raise NotImplementedError
+        # set scheduler
+        scheduler_G = torch.optim.lr_scheduler.MultiStepLR(
+            optimizer_G, milestones=self.cfg.schedule, gamma=self.cfg.gamma)
+        return [optimizer_G], [scheduler_G]
+    def training_step(self, batch, batch_idx):
+        if not self.cfg.fast_dev:
+            export_cfg(self.logger, self.cfg)
+        self.netG.train()
+        in_tensor_dict = {
+            "sample": batch["samples_geo"].permute(0, 2, 1),
+            "calib": batch["calib"],
+            "label": batch["labels_geo"].unsqueeze(1),
+        }
+        for name in self.in_total:
+            in_tensor_dict.update({name: batch[name]})
+        in_tensor_dict.update({
+            k: batch[k] if k in batch.keys() else None
+            for k in getattr(self, f"{self.prior_type}_keys")
+        })
+        preds_G, error_G = self.netG(in_tensor_dict)
+        acc, iou, prec, recall = self.evaluator.calc_acc(
+            preds_G.flatten(),
+            in_tensor_dict["label"].flatten(),
+            0.5,
+            use_sdf=self.cfg.sdf,
+        )
+        # metrics processing
+        metrics_log = {
+            "train_loss": error_G.item(),
+            "train_acc": acc.item(),
+            "train_iou": iou.item(),
+            "train_prec": prec.item(),
+            "train_recall": recall.item(),
+        }
+        tf_log = tf_log_convert(metrics_log)
+        bar_log = bar_log_convert(metrics_log)
+        if batch_idx % int(self.cfg.freq_show_train) == 0:
+            with torch.no_grad():
+                self.render_func(in_tensor_dict, dataset="train")
+        metrics_return = {
+            k.replace("train_", ""): torch.tensor(v)
+            for k, v in metrics_log.items()
+        }
+        metrics_return.update({
+            "loss": error_G,
+            "log": tf_log,
+            "progress_bar": bar_log
+        })
+        return metrics_return
+    def training_epoch_end(self, outputs):
+        if [] in outputs:
+            outputs = outputs[0]
+        # metrics processing
+        metrics_log = {
+            "train_avgloss": batch_mean(outputs, "loss"),
+            "train_avgiou": batch_mean(outputs, "iou"),
+            "train_avgprec": batch_mean(outputs, "prec"),
+            "train_avgrecall": batch_mean(outputs, "recall"),
+            "train_avgacc": batch_mean(outputs, "acc"),
+        }
+        tf_log = tf_log_convert(metrics_log)
+        return {"log": tf_log}
+    def validation_step(self, batch, batch_idx):
+        self.netG.eval()
+        self.netG.training = False
+        in_tensor_dict = {
+            "sample": batch["samples_geo"].permute(0, 2, 1),
+            "calib": batch["calib"],
+            "label": batch["labels_geo"].unsqueeze(1),
+        }
+        for name in self.in_total:
+            in_tensor_dict.update({name: batch[name]})
+        in_tensor_dict.update({
+            k: batch[k] if k in batch.keys() else None
+            for k in getattr(self, f"{self.prior_type}_keys")
+        })
+        preds_G, error_G = self.netG(in_tensor_dict)
+        acc, iou, prec, recall = self.evaluator.calc_acc(
+            preds_G.flatten(),
+            in_tensor_dict["label"].flatten(),
+            0.5,
+            use_sdf=self.cfg.sdf,
+        )
+        if batch_idx % int(self.cfg.freq_show_val) == 0:
+            with torch.no_grad():
+                self.render_func(in_tensor_dict, dataset="val", idx=batch_idx)
+        metrics_return = {
+            "val_loss": error_G,
+            "val_acc": acc,
+            "val_iou": iou,
+            "val_prec": prec,
+            "val_recall": recall,
+        }
+        return metrics_return
+    def validation_epoch_end(self, outputs):
+        # metrics processing
+        metrics_log = {
+            "val_avgloss": batch_mean(outputs, "val_loss"),
+            "val_avgacc": batch_mean(outputs, "val_acc"),
+            "val_avgiou": batch_mean(outputs, "val_iou"),
+            "val_avgprec": batch_mean(outputs, "val_prec"),
+            "val_avgrecall": batch_mean(outputs, "val_recall"),
+        }
+        tf_log = tf_log_convert(metrics_log)
+        return {"log": tf_log}
+    def compute_vis_cmap(self, smpl_type, smpl_verts, smpl_faces):
+        (xy, z) = torch.as_tensor(smpl_verts).split([2, 1], dim=1)
+        smpl_vis = get_visibility(xy, -z, torch.as_tensor(smpl_faces).long())
+        smpl_cmap = self.smpl_data.cmap_smpl_vids(smpl_type)
+        return {
+            "smpl_vis": smpl_vis.unsqueeze(0).to(self.device),
+            "smpl_cmap": smpl_cmap.unsqueeze(0).to(self.device),
+            "smpl_verts": smpl_verts.unsqueeze(0),
+        }
+    @torch.enable_grad()
+    def optim_body(self, in_tensor_dict, batch):
+        smpl_model = self.get_smpl_model(batch["type"][0], batch["gender"][0],
+                                         batch["age"][0], None).to(self.device)
+        in_tensor_dict["smpl_faces"] = (torch.tensor(
+            smpl_model.faces.astype(np.int)).long().unsqueeze(0).to(
+                self.device))
+        # The optimizer and variables
+        optimed_pose = torch.tensor(batch["body_pose"][0],
+                                    device=self.device,
+                                    requires_grad=True)  # [1,23,3,3]
+        optimed_trans = torch.tensor(batch["transl"][0],
+                                     device=self.device,
+                                     requires_grad=True)  # [3]
+        optimed_betas = torch.tensor(batch["betas"][0],
+                                     device=self.device,
+                                     requires_grad=True)  # [1,10]
+        optimed_orient = torch.tensor(batch["global_orient"][0],
+                                      device=self.device,
+                                      requires_grad=True)  # [1,1,3,3]
+        optimizer_smpl = torch.optim.SGD(
+            [optimed_pose, optimed_trans, optimed_betas, optimed_orient],
+            lr=1e-3,
+            momentum=0.9,
+        )
+        scheduler_smpl = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            optimizer_smpl,
+            mode="min",
+            factor=0.5,
+            verbose=0,
+            min_lr=1e-5,
+            patience=5)
+        loop_smpl = range(50)
+        for i in loop_smpl:
+            optimizer_smpl.zero_grad()
+            # prior_loss, optimed_pose = dataset.vposer_prior(optimed_pose)
+            smpl_out = smpl_model(
+                betas=optimed_betas,
+                body_pose=optimed_pose,
+                global_orient=optimed_orient,
+                transl=optimed_trans,
+                return_verts=True,
+            )
+            smpl_verts = smpl_out.vertices[0] * 100.0
+            smpl_verts = projection(smpl_verts,
+                                    batch["calib"][0],
+                                    format="tensor")
+            smpl_verts[:, 1] *= -1
+            # render optimized mesh (normal, T_normal, image [-1,1])
+            self.render.load_meshes(smpl_verts, in_tensor_dict["smpl_faces"])
+            (
+                in_tensor_dict["T_normal_F"],
+                in_tensor_dict["T_normal_B"],
+            ) = self.render.get_rgb_image()
+            T_mask_F, T_mask_B = self.render.get_silhouette_image()
+            with torch.no_grad():
+                (
+                    in_tensor_dict["normal_F"],
+                    in_tensor_dict["normal_B"],
+                ) = self.netG.normal_filter(in_tensor_dict)
+            # mask = torch.abs(in_tensor['T_normal_F']).sum(dim=0, keepdims=True) > 0.0
+            diff_F_smpl = torch.abs(in_tensor_dict["T_normal_F"] -
+                                    in_tensor_dict["normal_F"])
+            diff_B_smpl = torch.abs(in_tensor_dict["T_normal_B"] -
+                                    in_tensor_dict["normal_B"])
+            loss = (diff_F_smpl + diff_B_smpl).mean()
+            # silhouette loss
+            smpl_arr = torch.cat([T_mask_F, T_mask_B], dim=-1)[0]
+            gt_arr = torch.cat(
+                [in_tensor_dict["normal_F"][0], in_tensor_dict["normal_B"][0]],
+                dim=2).permute(1, 2, 0)
+            gt_arr = ((gt_arr + 1.0) * 0.5).to(self.device)
+            bg_color = (torch.Tensor(
+                [0.5, 0.5, 0.5]).unsqueeze(0).unsqueeze(0).to(self.device))
+            gt_arr = ((gt_arr - bg_color).sum(dim=-1) != 0.0).float()
+            loss += torch.abs(smpl_arr - gt_arr).mean()
+            # Image.fromarray(((in_tensor_dict['T_normal_F'][0].permute(1,2,0)+1.0)*0.5*255.0).detach().cpu().numpy().astype(np.uint8)).show()
+            # loop_smpl.set_description(f"smpl = {loss:.3f}")
+            loss.backward(retain_graph=True)
+            optimizer_smpl.step()
+            scheduler_smpl.step(loss)
+            in_tensor_dict["smpl_verts"] = smpl_verts.unsqueeze(0)
+        in_tensor_dict.update(
+            self.compute_vis_cmap(
+                batch["type"][0],
+                in_tensor_dict["smpl_verts"][0],
+                in_tensor_dict["smpl_faces"][0],
+            ))
+        features, inter = self.netG.filter(in_tensor_dict, return_inter=True)
+        return features, inter, in_tensor_dict
+    @torch.enable_grad()
+    def optim_cloth(self, verts_pr, faces_pr, inter):
+        # convert from GT to SDF
+        verts_pr -= (self.resolutions[-1] - 1) / 2.0
+        verts_pr /= (self.resolutions[-1] - 1) / 2.0
+        losses = {
+            "cloth": {
+                "weight": 5.0,
+                "value": 0.0
+            },
+            "edge": {
+                "weight": 100.0,
+                "value": 0.0
+            },
+            "normal": {
+                "weight": 0.2,
+                "value": 0.0
+            },
+            "laplacian": {
+                "weight": 100.0,
+                "value": 0.0
+            },
+            "smpl": {
+                "weight": 1.0,
+                "value": 0.0
+            },
+            "deform": {
+                "weight": 20.0,
+                "value": 0.0
+            },
+        }
+        deform_verts = torch.full(verts_pr.shape,
+                                  0.0,
+                                  device=self.device,
+                                  requires_grad=True)
+        optimizer_cloth = torch.optim.SGD([deform_verts],
+                                          lr=1e-1,
+                                          momentum=0.9)
+        scheduler_cloth = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            optimizer_cloth,
+            mode="min",
+            factor=0.1,
+            verbose=0,
+            min_lr=1e-3,
+            patience=5)
+        # cloth optimization
+        loop_cloth = range(100)
+        for i in loop_cloth:
+            optimizer_cloth.zero_grad()
+            self.render.load_meshes(
+                verts_pr.unsqueeze(0).to(self.device),
+                faces_pr.unsqueeze(0).to(self.device).long(),
+                deform_verts,
+            )
+            P_normal_F, P_normal_B = self.render.get_rgb_image()
+            update_mesh_shape_prior_losses(self.render.mesh, losses)
+            diff_F_cloth = torch.abs(P_normal_F[0] - inter[:3])
+            diff_B_cloth = torch.abs(P_normal_B[0] - inter[3:])
+            losses["cloth"]["value"] = (diff_F_cloth + diff_B_cloth).mean()
+            losses["deform"]["value"] = torch.topk(
+                torch.abs(deform_verts.flatten()), 30)[0].mean()
+            # Weighted sum of the losses
+            cloth_loss = torch.tensor(0.0, device=self.device)
+            pbar_desc = ""
+            for k in losses.keys():
+                if k != "smpl":
+                    cloth_loss_per_cls = losses[k]["value"] * \
+                        losses[k]["weight"]
+                    pbar_desc += f"{k}: {cloth_loss_per_cls:.3f} | "
+                    cloth_loss += cloth_loss_per_cls
+            # loop_cloth.set_description(pbar_desc)
+            cloth_loss.backward(retain_graph=True)
+            optimizer_cloth.step()
+            scheduler_cloth.step(cloth_loss)
+        # convert from GT to SDF
+        deform_verts = deform_verts.flatten().detach()
+        deform_verts[torch.topk(torch.abs(deform_verts),
+                                30)[1]] = deform_verts.mean()
+        deform_verts = deform_verts.view(-1, 3).cpu()
+        verts_pr += deform_verts
+        verts_pr *= (self.resolutions[-1] - 1) / 2.0
+        verts_pr += (self.resolutions[-1] - 1) / 2.0
+        return verts_pr
+    def test_step(self, batch, batch_idx):
+        self.netG.eval()
+        self.netG.training = False
+        in_tensor_dict = {}
+        # export paths
+        mesh_name = batch["subject"][0]
+        mesh_rot = batch["rotation"][0].item()
+        self.export_dir = osp.join(self.cfg.results_path, self.cfg.name,
+                                   "-".join(self.cfg.dataset.types), mesh_name)
+        os.makedirs(self.export_dir, exist_ok=True)
+        for name in self.in_total:
+            if name in batch.keys():
+                in_tensor_dict.update({name: batch[name]})
+        in_tensor_dict.update({
+            k: batch[k] if k in batch.keys() else None
+            for k in getattr(self, f"{self.prior_type}_keys")
+        })
+        if "T_normal_F" not in in_tensor_dict.keys(
+        ) or "T_normal_B" not in in_tensor_dict.keys():
+            # update the new T_normal_F/B
+            self.render.load_meshes(
+                batch["smpl_verts"] *
+                torch.tensor([1.0, -1.0, 1.0]).to(self.device),
+                batch["smpl_faces"])
+            T_normal_F, T_noraml_B = self.render.get_rgb_image()
+            in_tensor_dict.update({
+                'T_normal_F': T_normal_F,
+                'T_normal_B': T_noraml_B
+            })
+        with torch.no_grad():
+            features, inter = self.netG.filter(in_tensor_dict,
+                                               return_inter=True)
+            sdf = self.reconEngine(opt=self.cfg,
+                                   netG=self.netG,
+                                   features=features,
+                                   proj_matrix=None)
+        def tensor2arr(x):
+            return (x[0].permute(1, 2, 0).detach().cpu().numpy() +
+                    1.0) * 0.5 * 255.0
+        # save inter results
+        image = tensor2arr(in_tensor_dict["image"])
+        smpl_F = tensor2arr(in_tensor_dict["T_normal_F"])
+        smpl_B = tensor2arr(in_tensor_dict["T_normal_B"])
+        image_inter = np.concatenate(self.tensor2image(512, inter[0]) +
+                                     [smpl_F, smpl_B, image],
+                                     axis=1)
+        Image.fromarray((image_inter).astype(np.uint8)).save(
+            osp.join(self.export_dir, f"{mesh_rot}_inter.png"))
+        verts_pr, faces_pr = self.reconEngine.export_mesh(sdf)
+        if self.clean_mesh_flag:
+            verts_pr, faces_pr = clean_mesh(verts_pr, faces_pr)
+        verts_gt = batch["verts"][0]
+        faces_gt = batch["faces"][0]
+        self.result_eval.update({
+            "verts_gt": verts_gt,
+            "faces_gt": faces_gt,
+            "verts_pr": verts_pr,
+            "faces_pr": faces_pr,
+            "recon_size": (self.resolutions[-1] - 1.0),
+            "calib": batch["calib"][0],
+        })
+        self.evaluator.set_mesh(self.result_eval)
+        chamfer, p2s = self.evaluator.calculate_chamfer_p2s(num_samples=1000)
+        normal_consist = self.evaluator.calculate_normal_consist(
+            osp.join(self.export_dir, f"{mesh_rot}_nc.png"))
+        test_log = {"chamfer": chamfer, "p2s": p2s, "NC": normal_consist}
+        return test_log
+    def test_epoch_end(self, outputs):
+        # make_test_gif("/".join(self.export_dir.split("/")[:-2]))
+        accu_outputs = accumulate(
+            outputs,
+            rot_num=3,
+            split={
+                "cape-easy": (0, 50),
+                "cape-hard": (50, 100)
+            },
+        )
+        print(colored(self.cfg.name, "green"))
+        print(colored(self.cfg.dataset.noise_scale, "green"))
+        self.logger.experiment.add_hparams(
+            hparam_dict={
+                "lr_G": self.lr_G,
+                "bsize": self.batch_size
+            },
+            metric_dict=accu_outputs,
+        )
+        np.save(
+            osp.join(self.export_dir, "../test_results.npy"),
+            accu_outputs,
+            allow_pickle=True,
+        )
+        return accu_outputs
+    def tensor2image(self, height, inter):
+        all = []
+        for dim in self.in_geo_dim:
+            img = resize(
+                np.tile(
+                    ((inter[:dim].cpu().numpy() + 1.0) / 2.0 *
+                     255.0).transpose(1, 2, 0),
+                    (1, 1, int(3 / dim)),
+                ),
+                (height, height),
+                anti_aliasing=True,
+            )
+            all.append(img)
+            inter = inter[dim:]
+        return all
+    def render_func(self, in_tensor_dict, dataset="title", idx=0):
+        for name in in_tensor_dict.keys():
+            if in_tensor_dict[name] is not None:
+                in_tensor_dict[name] = in_tensor_dict[name][0:1]
+        self.netG.eval()
+        features, inter = self.netG.filter(in_tensor_dict, return_inter=True)
+        sdf = self.reconEngine(opt=self.cfg,
+                               netG=self.netG,
+                               features=features,
+                               proj_matrix=None)
+        if sdf is not None:
+            render = self.reconEngine.display(sdf)
+            image_pred = np.flip(render[:, :, ::-1], axis=0)
+            height = image_pred.shape[0]
+            image_gt = resize(
+                ((in_tensor_dict["image"].cpu().numpy()[0] + 1.0) / 2.0 *
+                 255.0).transpose(1, 2, 0),
+                (height, height),
+                anti_aliasing=True,
+            )
+            image_inter = self.tensor2image(height, inter[0])
+            image = np.concatenate([image_pred, image_gt] + image_inter,
+                                   axis=1)
+            step_id = self.global_step if dataset == "train" else self.global_step + idx
+            self.logger.experiment.add_image(
+                tag=f"Occupancy-{dataset}/{step_id}",
+                img_tensor=image.transpose(2, 0, 1),
+                global_step=step_id,
+            )
+    def test_single(self, batch):
+        self.netG.eval()
+        self.netG.training = False
+        in_tensor_dict = {}
+        for name in self.in_total:
+            if name in batch.keys():
+                in_tensor_dict.update({name: batch[name]})
+        in_tensor_dict.update({
+            k: batch[k] if k in batch.keys() else None
+            for k in getattr(self, f"{self.prior_type}_keys")
+        })
+        with torch.no_grad():
+            features, inter = self.netG.filter(in_tensor_dict,
+                                               return_inter=True)
+            sdf = self.reconEngine(opt=self.cfg,
+                                   netG=self.netG,
+                                   features=features,
+                                   proj_matrix=None)
+        verts_pr, faces_pr = self.reconEngine.export_mesh(sdf)
+        if self.clean_mesh_flag:
+            verts_pr, faces_pr = clean_mesh(verts_pr, faces_pr)
+        verts_pr -= (self.resolutions[-1] - 1) / 2.0
+        verts_pr /= (self.resolutions[-1] - 1) / 2.0
+        return verts_pr, faces_pr, inter

apps/Normal.py ADDED Viewed

	@@ -0,0 +1,220 @@

+from lib.net import NormalNet
+from lib.common.train_util import *
+import logging
+import torch
+import numpy as np
+from torch import nn
+from skimage.transform import resize
+import pytorch_lightning as pl
+torch.backends.cudnn.benchmark = True
+logging.getLogger("lightning").setLevel(logging.ERROR)
+import warnings
+warnings.filterwarnings("ignore")
+class Normal(pl.LightningModule):
+    def __init__(self, cfg):
+        super(Normal, self).__init__()
+        self.cfg = cfg
+        self.batch_size = self.cfg.batch_size
+        self.lr_N = self.cfg.lr_N
+        self.schedulers = []
+        self.netG = NormalNet(self.cfg, error_term=nn.SmoothL1Loss())
+        self.in_nml = [item[0] for item in cfg.net.in_nml]
+    def get_progress_bar_dict(self):
+        tqdm_dict = super().get_progress_bar_dict()
+        if "v_num" in tqdm_dict:
+            del tqdm_dict["v_num"]
+        return tqdm_dict
+    # Training related
+    def configure_optimizers(self):
+        # set optimizer
+        weight_decay = self.cfg.weight_decay
+        momentum = self.cfg.momentum
+        optim_params_N_F = [{
+            "params": self.netG.netF.parameters(),
+            "lr": self.lr_N
+        }]
+        optim_params_N_B = [{
+            "params": self.netG.netB.parameters(),
+            "lr": self.lr_N
+        }]
+        optimizer_N_F = torch.optim.Adam(optim_params_N_F,
+                                         lr=self.lr_N,
+                                         weight_decay=weight_decay)
+        optimizer_N_B = torch.optim.Adam(optim_params_N_B,
+                                         lr=self.lr_N,
+                                         weight_decay=weight_decay)
+        scheduler_N_F = torch.optim.lr_scheduler.MultiStepLR(
+            optimizer_N_F, milestones=self.cfg.schedule, gamma=self.cfg.gamma)
+        scheduler_N_B = torch.optim.lr_scheduler.MultiStepLR(
+            optimizer_N_B, milestones=self.cfg.schedule, gamma=self.cfg.gamma)
+        self.schedulers = [scheduler_N_F, scheduler_N_B]
+        optims = [optimizer_N_F, optimizer_N_B]
+        return optims, self.schedulers
+    def render_func(self, render_tensor):
+        height = render_tensor["image"].shape[2]
+        result_list = []
+        for name in render_tensor.keys():
+            result_list.append(
+                resize(
+                    ((render_tensor[name].cpu().numpy()[0] + 1.0) /
+                     2.0).transpose(1, 2, 0),
+                    (height, height),
+                    anti_aliasing=True,
+                ))
+        result_array = np.concatenate(result_list, axis=1)
+        return result_array
+    def training_step(self, batch, batch_idx, optimizer_idx):
+        export_cfg(self.logger, self.cfg)
+        # retrieve the data
+        in_tensor = {}
+        for name in self.in_nml:
+            in_tensor[name] = batch[name]
+        FB_tensor = {
+            "normal_F": batch["normal_F"],
+            "normal_B": batch["normal_B"]
+        }
+        self.netG.train()
+        preds_F, preds_B = self.netG(in_tensor)
+        error_NF, error_NB = self.netG.get_norm_error(preds_F, preds_B,
+                                                      FB_tensor)
+        (opt_nf, opt_nb) = self.optimizers()
+        opt_nf.zero_grad()
+        opt_nb.zero_grad()
+        self.manual_backward(error_NF, opt_nf)
+        self.manual_backward(error_NB, opt_nb)
+        opt_nf.step()
+        opt_nb.step()
+        if batch_idx > 0 and batch_idx % int(self.cfg.freq_show_train) == 0:
+            self.netG.eval()
+            with torch.no_grad():
+                nmlF, nmlB = self.netG(in_tensor)
+                in_tensor.update({"nmlF": nmlF, "nmlB": nmlB})
+                result_array = self.render_func(in_tensor)
+                self.logger.experiment.add_image(
+                    tag=f"Normal-train/{self.global_step}",
+                    img_tensor=result_array.transpose(2, 0, 1),
+                    global_step=self.global_step,
+                )
+        # metrics processing
+        metrics_log = {
+            "train_loss-NF": error_NF.item(),
+            "train_loss-NB": error_NB.item(),
+        }
+        tf_log = tf_log_convert(metrics_log)
+        bar_log = bar_log_convert(metrics_log)
+        return {
+            "loss": error_NF + error_NB,
+            "loss-NF": error_NF,
+            "loss-NB": error_NB,
+            "log": tf_log,
+            "progress_bar": bar_log,
+        }
+    def training_epoch_end(self, outputs):
+        if [] in outputs:
+            outputs = outputs[0]
+        # metrics processing
+        metrics_log = {
+            "train_avgloss": batch_mean(outputs, "loss"),
+            "train_avgloss-NF": batch_mean(outputs, "loss-NF"),
+            "train_avgloss-NB": batch_mean(outputs, "loss-NB"),
+        }
+        tf_log = tf_log_convert(metrics_log)
+        tf_log["lr-NF"] = self.schedulers[0].get_last_lr()[0]
+        tf_log["lr-NB"] = self.schedulers[1].get_last_lr()[0]
+        return {"log": tf_log}
+    def validation_step(self, batch, batch_idx):
+        # retrieve the data
+        in_tensor = {}
+        for name in self.in_nml:
+            in_tensor[name] = batch[name]
+        FB_tensor = {
+            "normal_F": batch["normal_F"],
+            "normal_B": batch["normal_B"]
+        }
+        self.netG.train()
+        preds_F, preds_B = self.netG(in_tensor)
+        error_NF, error_NB = self.netG.get_norm_error(preds_F, preds_B,
+                                                      FB_tensor)
+        if (batch_idx > 0 and batch_idx % int(self.cfg.freq_show_train)
+                == 0) or (batch_idx == 0):
+            with torch.no_grad():
+                nmlF, nmlB = self.netG(in_tensor)
+                in_tensor.update({"nmlF": nmlF, "nmlB": nmlB})
+                result_array = self.render_func(in_tensor)
+                self.logger.experiment.add_image(
+                    tag=f"Normal-val/{self.global_step}",
+                    img_tensor=result_array.transpose(2, 0, 1),
+                    global_step=self.global_step,
+                )
+        return {
+            "val_loss": error_NF + error_NB,
+            "val_loss-NF": error_NF,
+            "val_loss-NB": error_NB,
+        }
+    def validation_epoch_end(self, outputs):
+        # metrics processing
+        metrics_log = {
+            "val_avgloss": batch_mean(outputs, "val_loss"),
+            "val_avgloss-NF": batch_mean(outputs, "val_loss-NF"),
+            "val_avgloss-NB": batch_mean(outputs, "val_loss-NB"),
+        }
+        tf_log = tf_log_convert(metrics_log)
+        return {"log": tf_log}

apps/infer.py ADDED Viewed

	@@ -0,0 +1,492 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: [email protected]
+import os
+import gc
+import logging
+from lib.common.config import cfg
+from lib.dataset.mesh_util import (
+    load_checkpoint,
+    update_mesh_shape_prior_losses,
+    blend_rgb_norm,
+    unwrap,
+    remesh,
+    tensor2variable,
+    rot6d_to_rotmat
+)
+from lib.dataset.TestDataset import TestDataset
+from lib.common.render import query_color
+from lib.net.local_affine import LocalAffine
+from pytorch3d.structures import Meshes
+from apps.ICON import ICON
+from termcolor import colored
+import numpy as np
+from PIL import Image
+import trimesh
+import numpy as np
+from tqdm import tqdm
+import torch
+torch.backends.cudnn.benchmark = True
+logging.getLogger("trimesh").setLevel(logging.ERROR)
+def generate_model(in_path, model_type):
+    torch.cuda.empty_cache()
+    if model_type == 'ICON':
+        model_type = 'icon-filter'
+    else:
+        model_type = model_type.lower()
+    config_dict = {'loop_smpl': 100,
+                   'loop_cloth': 200,
+                   'patience': 5,
+                   'out_dir': './results',
+                   'hps_type': 'pymaf',
+                   'config': f"./configs/{model_type}.yaml"}
+    # cfg read and merge
+    cfg.merge_from_file(config_dict['config'])
+    cfg.merge_from_file("./lib/pymaf/configs/pymaf_config.yaml")
+    os.makedirs(config_dict['out_dir'], exist_ok=True)
+    cfg_show_list = [
+        "test_gpus",
+        [0],
+        "mcube_res",
+        256,
+        "clean_mesh",
+        True,
+    ]
+    cfg.merge_from_list(cfg_show_list)
+    cfg.freeze()
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+    device = torch.device(f"cuda:0")
+    # load model and dataloader
+    model = ICON(cfg)
+    model = load_checkpoint(model, cfg)
+    dataset_param = {
+        'image_path': in_path,
+        'seg_dir': None,
+        'has_det': True,            # w/ or w/o detection
+        'hps_type': 'pymaf'   # pymaf/pare/pixie
+    }
+    if config_dict['hps_type'] == "pixie" and "pamir" in config_dict['config']:
+        print(colored("PIXIE isn't compatible with PaMIR, thus switch to PyMAF", "red"))
+        dataset_param["hps_type"] = "pymaf"
+    dataset = TestDataset(dataset_param, device)
+    print(colored(f"Dataset Size: {len(dataset)}", "green"))
+    pbar = tqdm(dataset)
+    for data in pbar:
+        pbar.set_description(f"{data['name']}")
+        in_tensor = {"smpl_faces": data["smpl_faces"], "image": data["image"]}
+        # The optimizer and variables
+        optimed_pose = torch.tensor(
+            data["body_pose"], device=device, requires_grad=True
+        )  # [1,23,3,3]
+        optimed_trans = torch.tensor(
+            data["trans"], device=device, requires_grad=True
+        )  # [3]
+        optimed_betas = torch.tensor(
+            data["betas"], device=device, requires_grad=True
+        )  # [1,10]
+        optimed_orient = torch.tensor(
+            data["global_orient"], device=device, requires_grad=True
+        )  # [1,1,3,3]
+        optimizer_smpl = torch.optim.Adam(
+            [optimed_pose, optimed_trans, optimed_betas, optimed_orient],
+            lr=1e-3,
+            amsgrad=True,
+        )
+        scheduler_smpl = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            optimizer_smpl,
+            mode="min",
+            factor=0.5,
+            verbose=0,
+            min_lr=1e-5,
+            patience=config_dict['patience'],
+        )
+        losses = {
+            # Cloth: Normal_recon - Normal_pred
+            "cloth": {"weight": 1e1, "value": 0.0},
+            # Cloth: [RT]_v1 - [RT]_v2 (v1-edge-v2)
+            "stiffness": {"weight": 1e5, "value": 0.0},
+            # Cloth: det(R) = 1
+            "rigid": {"weight": 1e5, "value": 0.0},
+            # Cloth: edge length
+            "edge": {"weight": 0, "value": 0.0},
+            # Cloth: normal consistency
+            "nc": {"weight": 0, "value": 0.0},
+            # Cloth: laplacian smoonth
+            "laplacian": {"weight": 1e2, "value": 0.0},
+            # Body: Normal_pred - Normal_smpl
+            "normal": {"weight": 1e0, "value": 0.0},
+            # Body: Silhouette_pred - Silhouette_smpl
+            "silhouette": {"weight": 1e0, "value": 0.0},
+        }
+        # smpl optimization
+        loop_smpl = tqdm(range(config_dict['loop_smpl']))
+        for _ in loop_smpl:
+            optimizer_smpl.zero_grad()
+            # 6d_rot to rot_mat
+            optimed_orient_mat = rot6d_to_rotmat(optimed_orient.view(-1,6)).unsqueeze(0)
+            optimed_pose_mat = rot6d_to_rotmat(optimed_pose.view(-1,6)).unsqueeze(0)
+            if dataset_param["hps_type"] != "pixie":
+                smpl_out = dataset.smpl_model(
+                    betas=optimed_betas,
+                    body_pose=optimed_pose_mat,
+                    global_orient=optimed_orient_mat,
+                    pose2rot=False,
+                )
+                smpl_verts = ((smpl_out.vertices) +
+                              optimed_trans) * data["scale"]
+            else:
+                smpl_verts, _, _ = dataset.smpl_model(
+                    shape_params=optimed_betas,
+                    expression_params=tensor2variable(data["exp"], device),
+                    body_pose=optimed_pose_mat,
+                    global_pose=optimed_orient_mat,
+                    jaw_pose=tensor2variable(data["jaw_pose"], device),
+                    left_hand_pose=tensor2variable(
+                        data["left_hand_pose"], device),
+                    right_hand_pose=tensor2variable(
+                        data["right_hand_pose"], device),
+                )
+                smpl_verts = (smpl_verts + optimed_trans) * data["scale"]
+            # render optimized mesh (normal, T_normal, image [-1,1])
+            in_tensor["T_normal_F"], in_tensor["T_normal_B"] = dataset.render_normal(
+                smpl_verts *
+                torch.tensor([1.0, -1.0, -1.0]
+                             ).to(device), in_tensor["smpl_faces"]
+            )
+            T_mask_F, T_mask_B = dataset.render.get_silhouette_image()
+            with torch.no_grad():
+                in_tensor["normal_F"], in_tensor["normal_B"] = model.netG.normal_filter(
+                    in_tensor
+                )
+            diff_F_smpl = torch.abs(
+                in_tensor["T_normal_F"] - in_tensor["normal_F"])
+            diff_B_smpl = torch.abs(
+                in_tensor["T_normal_B"] - in_tensor["normal_B"])
+            losses["normal"]["value"] = (diff_F_smpl + diff_B_smpl).mean()
+            # silhouette loss
+            smpl_arr = torch.cat([T_mask_F, T_mask_B], dim=-1)[0]
+            gt_arr = torch.cat(
+                [in_tensor["normal_F"][0], in_tensor["normal_B"][0]], dim=2
+            ).permute(1, 2, 0)
+            gt_arr = ((gt_arr + 1.0) * 0.5).to(device)
+            bg_color = (
+                torch.Tensor([0.5, 0.5, 0.5]).unsqueeze(
+                    0).unsqueeze(0).to(device)
+            )
+            gt_arr = ((gt_arr - bg_color).sum(dim=-1) != 0.0).float()
+            diff_S = torch.abs(smpl_arr - gt_arr)
+            losses["silhouette"]["value"] = diff_S.mean()
+            # Weighted sum of the losses
+            smpl_loss = 0.0
+            pbar_desc = "Body Fitting --- "
+            for k in ["normal", "silhouette"]:
+                pbar_desc += f"{k}: {losses[k]['value'] * losses[k]['weight']:.3f} | "
+                smpl_loss += losses[k]["value"] * losses[k]["weight"]
+            pbar_desc += f"Total: {smpl_loss:.3f}"
+            loop_smpl.set_description(pbar_desc)
+            smpl_loss.backward()
+            optimizer_smpl.step()
+            scheduler_smpl.step(smpl_loss)
+            in_tensor["smpl_verts"] = smpl_verts * \
+                torch.tensor([1.0, 1.0, -1.0]).to(device)
+        # visualize the optimization process
+        # 1. SMPL Fitting
+        # 2. Clothes Refinement
+        os.makedirs(os.path.join(config_dict['out_dir'], cfg.name,
+                    "refinement"), exist_ok=True)
+        # visualize the final results in self-rotation mode
+        os.makedirs(os.path.join(config_dict['out_dir'],
+                    cfg.name, "vid"), exist_ok=True)
+        # final results rendered as image
+        # 1. Render the final fitted SMPL (xxx_smpl.png)
+        # 2. Render the final reconstructed clothed human (xxx_cloth.png)
+        # 3. Blend the original image with predicted cloth normal (xxx_overlap.png)
+        os.makedirs(os.path.join(config_dict['out_dir'],
+                    cfg.name, "png"), exist_ok=True)
+        # final reconstruction meshes
+        # 1. SMPL mesh (xxx_smpl.obj)
+        # 2. SMPL params (xxx_smpl.npy)
+        # 3. clohted mesh (xxx_recon.obj)
+        # 4. remeshed clothed mesh (xxx_remesh.obj)
+        # 5. refined clothed mesh (xxx_refine.obj)
+        os.makedirs(os.path.join(config_dict['out_dir'],
+                    cfg.name, "obj"), exist_ok=True)
+        norm_pred_F = (
+            ((in_tensor["normal_F"][0].permute(1, 2, 0) + 1.0) * 255.0 / 2.0)
+            .detach()
+            .cpu()
+            .numpy()
+            .astype(np.uint8)
+        )
+        norm_pred_B = (
+            ((in_tensor["normal_B"][0].permute(1, 2, 0) + 1.0) * 255.0 / 2.0)
+            .detach()
+            .cpu()
+            .numpy()
+            .astype(np.uint8)
+        )
+        norm_orig_F = unwrap(norm_pred_F, data)
+        norm_orig_B = unwrap(norm_pred_B, data)
+        mask_orig = unwrap(
+            np.repeat(
+                data["mask"].permute(1, 2, 0).detach().cpu().numpy(), 3, axis=2
+            ).astype(np.uint8),
+            data,
+        )
+        rgb_norm_F = blend_rgb_norm(data["ori_image"], norm_orig_F, mask_orig)
+        rgb_norm_B = blend_rgb_norm(data["ori_image"], norm_orig_B, mask_orig)
+        Image.fromarray(
+            np.concatenate(
+                [data["ori_image"].astype(np.uint8), rgb_norm_F, rgb_norm_B], axis=1)
+        ).save(os.path.join(config_dict['out_dir'], cfg.name, f"png/{data['name']}_overlap.png"))
+        smpl_obj = trimesh.Trimesh(
+            in_tensor["smpl_verts"].detach().cpu()[0] *
+            torch.tensor([1.0, -1.0, 1.0]),
+            in_tensor['smpl_faces'].detach().cpu()[0],
+            process=False,
+            maintains_order=True
+        )
+        smpl_obj.visual.vertex_colors = (smpl_obj.vertex_normals+1.0)*255.0*0.5
+        smpl_obj.export(
+            f"{config_dict['out_dir']}/{cfg.name}/obj/{data['name']}_smpl.obj")
+        smpl_obj.export(
+            f"{config_dict['out_dir']}/{cfg.name}/obj/{data['name']}_smpl.glb")
+        smpl_info = {'betas': optimed_betas,
+                     'pose': optimed_pose_mat,
+                     'orient': optimed_orient_mat,
+                     'trans': optimed_trans}
+        np.save(
+            f"{config_dict['out_dir']}/{cfg.name}/obj/{data['name']}_smpl.npy", smpl_info, allow_pickle=True)
+        # ------------------------------------------------------------------------------------------------------------------
+        # cloth optimization
+        # cloth recon
+        in_tensor.update(
+            dataset.compute_vis_cmap(
+                in_tensor["smpl_verts"][0], in_tensor["smpl_faces"][0]
+            )
+        )
+        if cfg.net.prior_type == "pamir":
+            in_tensor.update(
+                dataset.compute_voxel_verts(
+                    optimed_pose,
+                    optimed_orient,
+                    optimed_betas,
+                    optimed_trans,
+                    data["scale"],
+                )
+            )
+        with torch.no_grad():
+            verts_pr, faces_pr, _ = model.test_single(in_tensor)
+        recon_obj = trimesh.Trimesh(
+            verts_pr, faces_pr, process=False, maintains_order=True
+        )
+        recon_obj.visual.vertex_colors = (
+            recon_obj.vertex_normals+1.0)*255.0*0.5
+        recon_obj.export(
+            os.path.join(config_dict['out_dir'], cfg.name,
+                         f"obj/{data['name']}_recon.obj")
+        )
+        # Isotropic Explicit Remeshing for better geometry topology
+        verts_refine, faces_refine = remesh(os.path.join(config_dict['out_dir'], cfg.name,
+                                                         f"obj/{data['name']}_recon.obj"), 0.5, device)
+        # define local_affine deform verts
+        mesh_pr = Meshes(verts_refine, faces_refine).to(device)
+        local_affine_model = LocalAffine(
+            mesh_pr.verts_padded().shape[1], mesh_pr.verts_padded().shape[0], mesh_pr.edges_packed()).to(device)
+        optimizer_cloth = torch.optim.Adam(
+            [{'params': local_affine_model.parameters()}], lr=1e-4, amsgrad=True)
+        scheduler_cloth = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            optimizer_cloth,
+            mode="min",
+            factor=0.1,
+            verbose=0,
+            min_lr=1e-5,
+            patience=config_dict['patience'],
+        )
+        final = None
+        if config_dict['loop_cloth'] > 0:
+            loop_cloth = tqdm(range(config_dict['loop_cloth']))
+            for _ in loop_cloth:
+                optimizer_cloth.zero_grad()
+                deformed_verts, stiffness, rigid = local_affine_model(
+                    verts_refine.to(device), return_stiff=True)
+                mesh_pr = mesh_pr.update_padded(deformed_verts)
+                # losses for laplacian, edge, normal consistency
+                update_mesh_shape_prior_losses(mesh_pr, losses)
+                in_tensor["P_normal_F"], in_tensor["P_normal_B"] = dataset.render_normal(
+                    mesh_pr.verts_padded(), mesh_pr.faces_padded())
+                diff_F_cloth = torch.abs(
+                    in_tensor["P_normal_F"] - in_tensor["normal_F"])
+                diff_B_cloth = torch.abs(
+                    in_tensor["P_normal_B"] - in_tensor["normal_B"])
+                losses["cloth"]["value"] = (diff_F_cloth + diff_B_cloth).mean()
+                losses["stiffness"]["value"] = torch.mean(stiffness)
+                losses["rigid"]["value"] = torch.mean(rigid)
+                # Weighted sum of the losses
+                cloth_loss = torch.tensor(0.0, requires_grad=True).to(device)
+                pbar_desc = "Cloth Refinement --- "
+                for k in losses.keys():
+                    if k not in ["normal", "silhouette"] and losses[k]["weight"] > 0.0:
+                        cloth_loss = cloth_loss + \
+                            losses[k]["value"] * losses[k]["weight"]
+                        pbar_desc += f"{k}:{losses[k]['value']* losses[k]['weight']:.5f} | "
+                pbar_desc += f"Total: {cloth_loss:.5f}"
+                loop_cloth.set_description(pbar_desc)
+                # update params
+                cloth_loss.backward()
+                optimizer_cloth.step()
+                scheduler_cloth.step(cloth_loss)
+            final = trimesh.Trimesh(
+                mesh_pr.verts_packed().detach().squeeze(0).cpu(),
+                mesh_pr.faces_packed().detach().squeeze(0).cpu(),
+                process=False, maintains_order=True
+            )
+            # only with front texture
+            tex_colors = query_color(
+                mesh_pr.verts_packed().detach().squeeze(0).cpu(),
+                mesh_pr.faces_packed().detach().squeeze(0).cpu(),
+                in_tensor["image"],
+                device=device,
+            )
+            # full normal textures
+            norm_colors = (mesh_pr.verts_normals_padded().squeeze(
+                0).detach().cpu() + 1.0) * 0.5 * 255.0
+            final.visual.vertex_colors = tex_colors
+            final.export(
+                f"{config_dict['out_dir']}/{cfg.name}/obj/{data['name']}_refine.obj")
+            final.visual.vertex_colors = norm_colors
+            final.export(
+                f"{config_dict['out_dir']}/{cfg.name}/obj/{data['name']}_refine.glb")
+        # always export visualized video regardless of the cloth refinment
+        verts_lst = [smpl_obj.vertices, final.vertices]
+        faces_lst = [smpl_obj.faces, final.faces]
+        # self-rotated video
+        dataset.render.load_meshes(
+            verts_lst, faces_lst)
+        dataset.render.get_rendered_video(
+            [data["ori_image"], rgb_norm_F, rgb_norm_B],
+            os.path.join(config_dict['out_dir'], cfg.name,
+                         f"vid/{data['name']}_cloth.mp4"),
+        )
+    smpl_obj_path = f"{config_dict['out_dir']}/{cfg.name}/obj/{data['name']}_smpl.obj"
+    smpl_glb_path = f"{config_dict['out_dir']}/{cfg.name}/obj/{data['name']}_smpl.glb"
+    smpl_npy_path = f"{config_dict['out_dir']}/{cfg.name}/obj/{data['name']}_smpl.npy"
+    refine_obj_path = f"{config_dict['out_dir']}/{cfg.name}/obj/{data['name']}_refine.obj"
+    refine_glb_path = f"{config_dict['out_dir']}/{cfg.name}/obj/{data['name']}_refine.glb"
+    video_path = os.path.join(
+        config_dict['out_dir'], cfg.name, f"vid/{data['name']}_cloth.mp4")
+    overlap_path = os.path.join(
+        config_dict['out_dir'], cfg.name, f"png/{data['name']}_overlap.png")
+    # clean all the variables
+    for element in dir():
+        if 'path' not in element:
+            del locals()[element]
+    gc.collect()
+    torch.cuda.empty_cache()
+    return [smpl_glb_path, smpl_obj_path,smpl_npy_path,
+            refine_glb_path, refine_obj_path,
+            video_path, video_path, overlap_path]

configs / icon-filter.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+name: icon-filter
+ckpt_dir: "./data/ckpt/"
+resume_path: "https://huggingface.co/Yuliang/ICON/resolve/main/icon-filter.ckpt"
+normal_path: "https://huggingface.co/Yuliang/ICON/resolve/main/normal.ckpt"
+test_mode: True
+batch_size: 1
+net:
+  mlp_dim: [256, 512, 256, 128, 1]
+  res_layers: [2,3,4]
+  num_stack: 2
+  prior_type: "icon" # icon/pamir/icon
+  use_filter: True
+  in_geo: (('normal_F',3), ('normal_B',3))
+  in_nml: (('image',3), ('T_normal_F',3), ('T_normal_B',3))
+  smpl_feats: ['sdf', 'norm', 'vis', 'cmap']
+  gtype: 'HGPIFuNet'
+  norm_mlp: 'batch'
+  hourglass_dim: 6
+  smpl_dim: 7
+# user defined
+mcube_res: 512    # occupancy field resolution, higher --> more details
+clean_mesh: False # if True, will remove floating pieces

configs / icon-nofilter.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+name: icon-nofilter
+ckpt_dir: "./data/ckpt/"
+resume_path: "https://huggingface.co/Yuliang/ICON/resolve/main/icon-nofilter.ckpt"
+normal_path: "https://huggingface.co/Yuliang/ICON/resolve/main/normal.ckpt"
+test_mode: True
+batch_size: 1
+net:
+  mlp_dim: [256, 512, 256, 128, 1]
+  res_layers: [2,3,4]
+  num_stack: 2
+  prior_type: "icon" # icon/pamir/icon
+  use_filter: False
+  in_geo: (('normal_F',3), ('normal_B',3))
+  in_nml: (('image',3), ('T_normal_F',3), ('T_normal_B',3))
+  smpl_feats: ['sdf', 'norm', 'vis', 'cmap']
+  gtype: 'HGPIFuNet'
+  norm_mlp: 'batch'
+  hourglass_dim: 6
+  smpl_dim: 7
+# user defined
+mcube_res: 512    # occupancy field resolution, higher --> more details
+clean_mesh: False # if True, will remove floating pieces

configs /pamir.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+name: pamir
+ckpt_dir: "./data/ckpt/"
+resume_path: "https://huggingface.co/Yuliang/ICON/resolve/main/pamir.ckpt"
+normal_path: "https://huggingface.co/Yuliang/ICON/resolve/main/normal.ckpt"
+test_mode: True
+batch_size: 1
+net:
+  mlp_dim: [256, 512, 256, 128, 1]
+  res_layers: [2,3,4]
+  num_stack: 2
+  prior_type: "pamir" # icon/pamir/icon
+  use_filter: True
+  in_geo: (('image',3), ('normal_F',3), ('normal_B',3))
+  in_nml: (('image',3), ('T_normal_F',3), ('T_normal_B',3))
+  gtype: 'HGPIFuNet'
+  norm_mlp: 'batch'
+  hourglass_dim: 6
+  voxel_dim: 7
+# user defined
+mcube_res: 512    # occupancy field resolution, higher --> more details
+clean_mesh: False # if True, will remove floating pieces

configs /pifu.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+name: pifu
+ckpt_dir: "./data/ckpt/"
+resume_path: "https://huggingface.co/Yuliang/ICON/resolve/main/pifu.ckpt"
+normal_path: "https://huggingface.co/Yuliang/ICON/resolve/main/normal.ckpt"
+test_mode: True
+batch_size: 1
+net:
+  mlp_dim: [256, 512, 256, 128, 1]
+  res_layers: [2,3,4]
+  num_stack: 2
+  prior_type: "pifu" # icon/pamir/icon
+  use_filter: True
+  in_geo: (('image',3), ('normal_F',3), ('normal_B',3))
+  in_nml: (('image',3), ('T_normal_F',3), ('T_normal_B',3))
+  gtype: 'HGPIFuNet'
+  norm_mlp: 'batch'
+  hourglass_dim: 12
+# user defined
+mcube_res: 512    # occupancy field resolution, higher --> more details
+clean_mesh: False # if True, will remove floating pieces

lib / pymaf / configs / pymaf_config.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+SOLVER:
+  MAX_ITER: 500000
+  TYPE: Adam
+  BASE_LR: 0.00005
+  GAMMA: 0.1
+  STEPS: [0]
+  EPOCHS: [0]
+DEBUG: False
+LOGDIR: ''
+DEVICE: cuda
+NUM_WORKERS: 8
+SEED_VALUE: -1
+LOSS:
+  KP_2D_W: 300.0
+  KP_3D_W: 300.0
+  SHAPE_W: 0.06
+  POSE_W: 60.0
+  VERT_W: 0.0
+  INDEX_WEIGHTS: 2.0
+  # Loss weights for surface parts. (24 Parts)
+  PART_WEIGHTS: 0.3
+  # Loss weights for UV regression.
+  POINT_REGRESSION_WEIGHTS: 0.5
+TRAIN:
+  NUM_WORKERS: 8
+  BATCH_SIZE: 64
+  PIN_MEMORY: True
+TEST:
+  BATCH_SIZE: 32
+MODEL:
+  PyMAF:
+    BACKBONE: 'res50'
+    MLP_DIM: [256, 128, 64, 5]
+    N_ITER: 3
+    AUX_SUPV_ON: True
+    DP_HEATMAP_SIZE: 56
+RES_MODEL:
+  DECONV_WITH_BIAS: False
+  NUM_DECONV_LAYERS: 3
+  NUM_DECONV_FILTERS:
+  - 256
+  - 256
+  - 256
+  NUM_DECONV_KERNELS:
+  - 4
+  - 4
+  - 4

lib / pymaf /core / __init__.py ADDED Viewed

File without changes

lib / pymaf /core / train_options.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import argparse
+class TrainOptions():
+    def __init__(self):
+        self.parser = argparse.ArgumentParser()
+        gen = self.parser.add_argument_group('General')
+        gen.add_argument(
+            '--resume',
+            dest='resume',
+            default=False,
+            action='store_true',
+            help='Resume from checkpoint (Use latest checkpoint by default')
+        io = self.parser.add_argument_group('io')
+        io.add_argument('--log_dir',
+                        default='logs',
+                        help='Directory to store logs')
+        io.add_argument(
+            '--pretrained_checkpoint',
+            default=None,
+            help='Load a pretrained checkpoint at the beginning training')
+        train = self.parser.add_argument_group('Training Options')
+        train.add_argument('--num_epochs',
+                           type=int,
+                           default=200,
+                           help='Total number of training epochs')
+        train.add_argument('--regressor',
+                           type=str,
+                           choices=['hmr', 'pymaf_net'],
+                           default='pymaf_net',
+                           help='Name of the SMPL regressor.')
+        train.add_argument('--cfg_file',
+                           type=str,
+                           default='./configs/pymaf_config.yaml',
+                           help='config file path for PyMAF.')
+        train.add_argument(
+            '--img_res',
+            type=int,
+            default=224,
+            help='Rescale bounding boxes to size [img_res, img_res] before feeding them in the network'
+        )
+        train.add_argument(
+            '--rot_factor',
+            type=float,
+            default=30,
+            help='Random rotation in the range [-rot_factor, rot_factor]')
+        train.add_argument(
+            '--noise_factor',
+            type=float,
+            default=0.4,
+            help='Randomly multiply pixel values with factor in the range [1-noise_factor, 1+noise_factor]'
+        )
+        train.add_argument(
+            '--scale_factor',
+            type=float,
+            default=0.25,
+            help='Rescale bounding boxes by a factor of [1-scale_factor,1+scale_factor]'
+        )
+        train.add_argument(
+            '--openpose_train_weight',
+            default=0.,
+            help='Weight for OpenPose keypoints during training')
+        train.add_argument('--gt_train_weight',
+                           default=1.,
+                           help='Weight for GT keypoints during training')
+        train.add_argument('--eval_dataset',
+                           type=str,
+                           default='h36m-p2-mosh',
+                           help='Name of the evaluation dataset.')
+        train.add_argument('--single_dataset',
+                           default=False,
+                           action='store_true',
+                           help='Use a single dataset')
+        train.add_argument('--single_dataname',
+                           type=str,
+                           default='h36m',
+                           help='Name of the single dataset.')
+        train.add_argument('--eval_pve',
+                           default=False,
+                           action='store_true',
+                           help='evaluate PVE')
+        train.add_argument('--overwrite',
+                           default=False,
+                           action='store_true',
+                           help='overwrite the latest checkpoint')
+        train.add_argument('--distributed',
+                           action='store_true',
+                           help='Use distributed training')
+        train.add_argument('--dist_backend',
+                           default='nccl',
+                           type=str,
+                           help='distributed backend')
+        train.add_argument('--dist_url',
+                           default='tcp://127.0.0.1:10356',
+                           type=str,
+                           help='url used to set up distributed training')
+        train.add_argument('--world_size',
+                           default=1,
+                           type=int,
+                           help='number of nodes for distributed training')
+        train.add_argument("--local_rank", default=0, type=int)
+        train.add_argument('--rank',
+                           default=0,
+                           type=int,
+                           help='node rank for distributed training')
+        train.add_argument(
+            '--multiprocessing_distributed',
+            action='store_true',
+            help='Use multi-processing distributed training to launch '
+            'N processes per node, which has N GPUs. This is the '
+            'fastest way to use PyTorch for either single node or '
+            'multi node data parallel training')
+        misc = self.parser.add_argument_group('Misc Options')
+        misc.add_argument('--misc',
+                          help="Modify config options using the command-line",
+                          default=None,
+                          nargs=argparse.REMAINDER)
+        return
+    def parse_args(self):
+        """Parse input arguments."""
+        self.args = self.parser.parse_args()
+        self.save_dump()
+        return self.args
+    def save_dump(self):
+        """Store all argument values to a json file.
+        The default location is logs/expname/args.json.
+        """
+        pass

lib / pymaf /core /base_trainer.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# This script is borrowed and extended from https://github.com/nkolot/SPIN/blob/master/utils/base_trainer.py
+from __future__ import division
+import logging
+from utils import CheckpointSaver
+from tensorboardX import SummaryWriter
+import torch
+from tqdm import tqdm
+tqdm.monitor_interval = 0
+logger = logging.getLogger(__name__)
+class BaseTrainer(object):
+    """Base class for Trainer objects.
+    Takes care of checkpointing/logging/resuming training.
+    """
+    def __init__(self, options):
+        self.options = options
+        if options.multiprocessing_distributed:
+            self.device = torch.device('cuda', options.gpu)
+        else:
+            self.device = torch.device(
+                'cuda' if torch.cuda.is_available() else 'cpu')
+        # override this function to define your model, optimizers etc.
+        self.saver = CheckpointSaver(save_dir=options.checkpoint_dir,
+                                     overwrite=options.overwrite)
+        if options.rank == 0:
+            self.summary_writer = SummaryWriter(self.options.summary_dir)
+        self.init_fn()
+        self.checkpoint = None
+        if options.resume and self.saver.exists_checkpoint():
+            self.checkpoint = self.saver.load_checkpoint(
+                self.models_dict, self.optimizers_dict)
+        if self.checkpoint is None:
+            self.epoch_count = 0
+            self.step_count = 0
+        else:
+            self.epoch_count = self.checkpoint['epoch']
+            self.step_count = self.checkpoint['total_step_count']
+        if self.checkpoint is not None:
+            self.checkpoint_batch_idx = self.checkpoint['batch_idx']
+        else:
+            self.checkpoint_batch_idx = 0
+        self.best_performance = float('inf')
+    def load_pretrained(self, checkpoint_file=None):
+        """Load a pretrained checkpoint.
+        This is different from resuming training using --resume.
+        """
+        if checkpoint_file is not None:
+            checkpoint = torch.load(checkpoint_file)
+            for model in self.models_dict:
+                if model in checkpoint:
+                    self.models_dict[model].load_state_dict(checkpoint[model],
+                                                            strict=True)
+                    print(f'Checkpoint {model} loaded')
+    def move_dict_to_device(self, dict, device, tensor2float=False):
+        for k, v in dict.items():
+            if isinstance(v, torch.Tensor):
+                if tensor2float:
+                    dict[k] = v.float().to(device)
+                else:
+                    dict[k] = v.to(device)
+    # The following methods (with the possible exception of test) have to be implemented in the derived classes
+    def train(self, epoch):
+        raise NotImplementedError('You need to provide an train method')
+    def init_fn(self):
+        raise NotImplementedError('You need to provide an _init_fn method')
+    def train_step(self, input_batch):
+        raise NotImplementedError('You need to provide a _train_step method')
+    def train_summaries(self, input_batch):
+        raise NotImplementedError(
+            'You need to provide a _train_summaries method')
+    def visualize(self, input_batch):
+        raise NotImplementedError('You need to provide a visualize method')
+    def validate(self):
+        pass
+    def test(self):
+        pass
+    def evaluate(self):
+        pass
+    def fit(self):
+        # Run training for num_epochs epochs
+        for epoch in tqdm(range(self.epoch_count, self.options.num_epochs),
+                          total=self.options.num_epochs,
+                          initial=self.epoch_count):
+            self.epoch_count = epoch
+            self.train(epoch)
+        return

lib / pymaf /core /cfgs.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: [email protected]
+import os
+import json
+from yacs.config import CfgNode as CN
+# Configuration variables
+cfg = CN(new_allowed=True)
+cfg.OUTPUT_DIR = 'results'
+cfg.DEVICE = 'cuda'
+cfg.DEBUG = False
+cfg.LOGDIR = ''
+cfg.VAL_VIS_BATCH_FREQ = 200
+cfg.TRAIN_VIS_ITER_FERQ = 1000
+cfg.SEED_VALUE = -1
+cfg.TRAIN = CN(new_allowed=True)
+cfg.LOSS = CN(new_allowed=True)
+cfg.LOSS.KP_2D_W = 300.0
+cfg.LOSS.KP_3D_W = 300.0
+cfg.LOSS.SHAPE_W = 0.06
+cfg.LOSS.POSE_W = 60.0
+cfg.LOSS.VERT_W = 0.0
+# Loss weights for dense correspondences
+cfg.LOSS.INDEX_WEIGHTS = 2.0
+# Loss weights for surface parts. (24 Parts)
+cfg.LOSS.PART_WEIGHTS = 0.3
+# Loss weights for UV regression.
+cfg.LOSS.POINT_REGRESSION_WEIGHTS = 0.5
+cfg.MODEL = CN(new_allowed=True)
+cfg.MODEL.PyMAF = CN(new_allowed=True)
+# switch
+cfg.TRAIN.VAL_LOOP = True
+cfg.TEST = CN(new_allowed=True)
+def get_cfg_defaults():
+    """Get a yacs CfgNode object with default values for my_project."""
+    # Return a clone so that the defaults will not be altered
+    # This is for the "local variable" use pattern
+    # return cfg.clone()
+    return cfg
+def update_cfg(cfg_file):
+    # cfg = get_cfg_defaults()
+    cfg.merge_from_file(cfg_file)
+    # return cfg.clone()
+    return cfg
+def parse_args(args):
+    cfg_file = args.cfg_file
+    if args.cfg_file is not None:
+        cfg = update_cfg(args.cfg_file)
+    else:
+        cfg = get_cfg_defaults()
+    # if args.misc is not None:
+    #     cfg.merge_from_list(args.misc)
+    return cfg
+def parse_args_extend(args):
+    if args.resume:
+        if not os.path.exists(args.log_dir):
+            raise ValueError(
+                'Experiment are set to resume mode, but log directory does not exist.'
+            )
+        # load log's cfg
+        cfg_file = os.path.join(args.log_dir, 'cfg.yaml')
+        cfg = update_cfg(cfg_file)
+        if args.misc is not None:
+            cfg.merge_from_list(args.misc)
+    else:
+        parse_args(args)

lib / pymaf /core /constants.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# This script is borrowed and extended from https://github.com/nkolot/SPIN/blob/master/constants.py
+FOCAL_LENGTH = 5000.
+IMG_RES = 224
+# Mean and standard deviation for normalizing input image
+IMG_NORM_MEAN = [0.485, 0.456, 0.406]
+IMG_NORM_STD = [0.229, 0.224, 0.225]
+"""
+We create a superset of joints containing the OpenPose joints together with the ones that each dataset provides.
+We keep a superset of 24 joints such that we include all joints from every dataset.
+If a dataset doesn't provide annotations for a specific joint, we simply ignore it.
+The joints used here are the following:
+"""
+JOINT_NAMES = [
+    # 25 OpenPose joints (in the order provided by OpenPose)
+    'OP Nose',
+    'OP Neck',
+    'OP RShoulder',
+    'OP RElbow',
+    'OP RWrist',
+    'OP LShoulder',
+    'OP LElbow',
+    'OP LWrist',
+    'OP MidHip',
+    'OP RHip',
+    'OP RKnee',
+    'OP RAnkle',
+    'OP LHip',
+    'OP LKnee',
+    'OP LAnkle',
+    'OP REye',
+    'OP LEye',
+    'OP REar',
+    'OP LEar',
+    'OP LBigToe',
+    'OP LSmallToe',
+    'OP LHeel',
+    'OP RBigToe',
+    'OP RSmallToe',
+    'OP RHeel',
+    # 24 Ground Truth joints (superset of joints from different datasets)
+    'Right Ankle',
+    'Right Knee',
+    'Right Hip',  # 2
+    'Left Hip',
+    'Left Knee',  # 4
+    'Left Ankle',
+    'Right Wrist',  # 6
+    'Right Elbow',
+    'Right Shoulder',  # 8
+    'Left Shoulder',
+    'Left Elbow',  # 10
+    'Left Wrist',
+    'Neck (LSP)',  # 12
+    'Top of Head (LSP)',
+    'Pelvis (MPII)',  # 14
+    'Thorax (MPII)',
+    'Spine (H36M)',  # 16
+    'Jaw (H36M)',
+    'Head (H36M)',  # 18
+    'Nose',
+    'Left Eye',
+    'Right Eye',
+    'Left Ear',
+    'Right Ear'
+]
+# Dict containing the joints in numerical order
+JOINT_IDS = {JOINT_NAMES[i]: i for i in range(len(JOINT_NAMES))}
+# Map joints to SMPL joints
+JOINT_MAP = {
+    'OP Nose': 24,
+    'OP Neck': 12,
+    'OP RShoulder': 17,
+    'OP RElbow': 19,
+    'OP RWrist': 21,
+    'OP LShoulder': 16,
+    'OP LElbow': 18,
+    'OP LWrist': 20,
+    'OP MidHip': 0,
+    'OP RHip': 2,
+    'OP RKnee': 5,
+    'OP RAnkle': 8,
+    'OP LHip': 1,
+    'OP LKnee': 4,
+    'OP LAnkle': 7,
+    'OP REye': 25,
+    'OP LEye': 26,
+    'OP REar': 27,
+    'OP LEar': 28,
+    'OP LBigToe': 29,
+    'OP LSmallToe': 30,
+    'OP LHeel': 31,
+    'OP RBigToe': 32,
+    'OP RSmallToe': 33,
+    'OP RHeel': 34,
+    'Right Ankle': 8,
+    'Right Knee': 5,
+    'Right Hip': 45,
+    'Left Hip': 46,
+    'Left Knee': 4,
+    'Left Ankle': 7,
+    'Right Wrist': 21,
+    'Right Elbow': 19,
+    'Right Shoulder': 17,
+    'Left Shoulder': 16,
+    'Left Elbow': 18,
+    'Left Wrist': 20,
+    'Neck (LSP)': 47,
+    'Top of Head (LSP)': 48,
+    'Pelvis (MPII)': 49,
+    'Thorax (MPII)': 50,
+    'Spine (H36M)': 51,
+    'Jaw (H36M)': 52,
+    'Head (H36M)': 53,
+    'Nose': 24,
+    'Left Eye': 26,
+    'Right Eye': 25,
+    'Left Ear': 28,
+    'Right Ear': 27
+}
+# Joint selectors
+# Indices to get the 14 LSP joints from the 17 H36M joints
+H36M_TO_J17 = [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10, 0, 7, 9]
+H36M_TO_J14 = H36M_TO_J17[:14]
+# Indices to get the 14 LSP joints from the ground truth joints
+J24_TO_J17 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18, 14, 16, 17]
+J24_TO_J14 = J24_TO_J17[:14]
+J24_TO_J19 = J24_TO_J17[:14] + [19, 20, 21, 22, 23]
+J24_TO_JCOCO = [19, 20, 21, 22, 23, 9, 8, 10, 7, 11, 6, 3, 2, 4, 1, 5, 0]
+# Permutation of SMPL pose parameters when flipping the shape
+SMPL_JOINTS_FLIP_PERM = [
+    0, 2, 1, 3, 5, 4, 6, 8, 7, 9, 11, 10, 12, 14, 13, 15, 17, 16, 19, 18, 21,
+    20, 23, 22
+]
+SMPL_POSE_FLIP_PERM = []
+for i in SMPL_JOINTS_FLIP_PERM:
+    SMPL_POSE_FLIP_PERM.append(3 * i)
+    SMPL_POSE_FLIP_PERM.append(3 * i + 1)
+    SMPL_POSE_FLIP_PERM.append(3 * i + 2)
+# Permutation indices for the 24 ground truth joints
+J24_FLIP_PERM = [
+    5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 15, 16, 17, 18, 19, 21,
+    20, 23, 22
+]
+# Permutation indices for the full set of 49 joints
+J49_FLIP_PERM = [0, 1, 5, 6, 7, 2, 3, 4, 8, 12, 13, 14, 9, 10, 11, 16, 15, 18, 17, 22, 23, 24, 19, 20, 21]\
+    + [25+i for i in J24_FLIP_PERM]
+SMPL_J49_FLIP_PERM = [0, 1, 5, 6, 7, 2, 3, 4, 8, 12, 13, 14, 9, 10, 11, 16, 15, 18, 17, 22, 23, 24, 19, 20, 21]\
+    + [25+i for i in SMPL_JOINTS_FLIP_PERM]

lib / pymaf /core /fits_dict.py ADDED Viewed

	@@ -0,0 +1,133 @@

+'''
+This script is borrowed and extended from https://github.com/nkolot/SPIN/blob/master/train/fits_dict.py
+'''
+import os
+import cv2
+import torch
+import numpy as np
+from torchgeometry import angle_axis_to_rotation_matrix, rotation_matrix_to_angle_axis
+from core import path_config, constants
+import logging
+logger = logging.getLogger(__name__)
+class FitsDict():
+    """ Dictionary keeping track of the best fit per image in the training set """
+    def __init__(self, options, train_dataset):
+        self.options = options
+        self.train_dataset = train_dataset
+        self.fits_dict = {}
+        self.valid_fit_state = {}
+        # array used to flip SMPL pose parameters
+        self.flipped_parts = torch.tensor(constants.SMPL_POSE_FLIP_PERM,
+                                          dtype=torch.int64)
+        # Load dictionary state
+        for ds_name, ds in train_dataset.dataset_dict.items():
+            if ds_name in ['h36m']:
+                dict_file = os.path.join(path_config.FINAL_FITS_DIR,
+                                         ds_name + '.npy')
+                self.fits_dict[ds_name] = torch.from_numpy(np.load(dict_file))
+                self.valid_fit_state[ds_name] = torch.ones(len(
+                    self.fits_dict[ds_name]),
+                    dtype=torch.uint8)
+            else:
+                dict_file = os.path.join(path_config.FINAL_FITS_DIR,
+                                         ds_name + '.npz')
+                fits_dict = np.load(dict_file)
+                opt_pose = torch.from_numpy(fits_dict['pose'])
+                opt_betas = torch.from_numpy(fits_dict['betas'])
+                opt_valid_fit = torch.from_numpy(fits_dict['valid_fit']).to(
+                    torch.uint8)
+                self.fits_dict[ds_name] = torch.cat([opt_pose, opt_betas],
+                                                    dim=1)
+                self.valid_fit_state[ds_name] = opt_valid_fit
+        if not options.single_dataset:
+            for ds in train_dataset.datasets:
+                if ds.dataset not in ['h36m']:
+                    ds.pose = self.fits_dict[ds.dataset][:, :72].numpy()
+                    ds.betas = self.fits_dict[ds.dataset][:, 72:].numpy()
+                    ds.has_smpl = self.valid_fit_state[ds.dataset].numpy()
+    def save(self):
+        """ Save dictionary state to disk """
+        for ds_name in self.train_dataset.dataset_dict.keys():
+            dict_file = os.path.join(self.options.checkpoint_dir,
+                                     ds_name + '_fits.npy')
+            np.save(dict_file, self.fits_dict[ds_name].cpu().numpy())
+    def __getitem__(self, x):
+        """ Retrieve dictionary entries """
+        dataset_name, ind, rot, is_flipped = x
+        batch_size = len(dataset_name)
+        pose = torch.zeros((batch_size, 72))
+        betas = torch.zeros((batch_size, 10))
+        for ds, i, n in zip(dataset_name, ind, range(batch_size)):
+            params = self.fits_dict[ds][i]
+            pose[n, :] = params[:72]
+            betas[n, :] = params[72:]
+        pose = pose.clone()
+        # Apply flipping and rotation
+        pose = self.flip_pose(self.rotate_pose(pose, rot), is_flipped)
+        betas = betas.clone()
+        return pose, betas
+    def get_vaild_state(self, dataset_name, ind):
+        batch_size = len(dataset_name)
+        valid_fit = torch.zeros(batch_size, dtype=torch.uint8)
+        for ds, i, n in zip(dataset_name, ind, range(batch_size)):
+            valid_fit[n] = self.valid_fit_state[ds][i]
+        valid_fit = valid_fit.clone()
+        return valid_fit
+    def __setitem__(self, x, val):
+        """ Update dictionary entries """
+        dataset_name, ind, rot, is_flipped, update = x
+        pose, betas = val
+        batch_size = len(dataset_name)
+        # Undo flipping and rotation
+        pose = self.rotate_pose(self.flip_pose(pose, is_flipped), -rot)
+        params = torch.cat((pose, betas), dim=-1).cpu()
+        for ds, i, n in zip(dataset_name, ind, range(batch_size)):
+            if update[n]:
+                self.fits_dict[ds][i] = params[n]
+    def flip_pose(self, pose, is_flipped):
+        """flip SMPL pose parameters"""
+        is_flipped = is_flipped.byte()
+        pose_f = pose.clone()
+        pose_f[is_flipped, :] = pose[is_flipped][:, self.flipped_parts]
+        # we also negate the second and the third dimension of the axis-angle representation
+        pose_f[is_flipped, 1::3] *= -1
+        pose_f[is_flipped, 2::3] *= -1
+        return pose_f
+    def rotate_pose(self, pose, rot):
+        """Rotate SMPL pose parameters by rot degrees"""
+        pose = pose.clone()
+        cos = torch.cos(-np.pi * rot / 180.)
+        sin = torch.sin(-np.pi * rot / 180.)
+        zeros = torch.zeros_like(cos)
+        r3 = torch.zeros(cos.shape[0], 1, 3, device=cos.device)
+        r3[:, 0, -1] = 1
+        R = torch.cat([
+            torch.stack([cos, -sin, zeros], dim=-1).unsqueeze(1),
+            torch.stack([sin, cos, zeros], dim=-1).unsqueeze(1), r3
+        ],
+            dim=1)
+        global_pose = pose[:, :3]
+        global_pose_rotmat = angle_axis_to_rotation_matrix(global_pose)
+        global_pose_rotmat_3b3 = global_pose_rotmat[:, :3, :3]
+        global_pose_rotmat_3b3 = torch.matmul(R, global_pose_rotmat_3b3)
+        global_pose_rotmat[:, :3, :3] = global_pose_rotmat_3b3
+        global_pose_rotmat = global_pose_rotmat[:, :-1, :-1].cpu().numpy()
+        global_pose_np = np.zeros((global_pose.shape[0], 3))
+        for i in range(global_pose.shape[0]):
+            aa, _ = cv2.Rodrigues(global_pose_rotmat[i])
+            global_pose_np[i, :] = aa.squeeze()
+        pose[:, :3] = torch.from_numpy(global_pose_np).to(pose.device)
+        return pose

lib / pymaf /core /path_config.py ADDED Viewed

	@@ -0,0 +1,24 @@

+"""
+This script is borrowed and extended from https://github.com/nkolot/SPIN/blob/master/path_config.py
+path configuration
+This file contains definitions of useful data stuctures and the paths
+for the datasets and data files necessary to run the code.
+Things you need to change: *_ROOT that indicate the path to each dataset
+"""
+import os
+from huggingface_hub import hf_hub_url, cached_download
+# pymaf
+pymaf_data_dir = hf_hub_url('Yuliang/PyMAF', '')
+smpl_data_dir = hf_hub_url('Yuliang/SMPL', '')
+SMPL_MODEL_DIR = os.path.join(smpl_data_dir, 'models/smpl')
+SMPL_MEAN_PARAMS = cached_download(os.path.join(pymaf_data_dir, 'smpl_mean_params.npz'), use_auth_token=os.environ['ICON'])
+MESH_DOWNSAMPLEING = cached_download(os.path.join(pymaf_data_dir, 'mesh_downsampling.npz'), use_auth_token=os.environ['ICON'])
+CUBE_PARTS_FILE = cached_download(os.path.join(pymaf_data_dir, 'cube_parts.npy'), use_auth_token=os.environ['ICON'])
+JOINT_REGRESSOR_TRAIN_EXTRA = cached_download(os.path.join(pymaf_data_dir, 'J_regressor_extra.npy'), use_auth_token=os.environ['ICON'])
+JOINT_REGRESSOR_H36M = cached_download(os.path.join(pymaf_data_dir, 'J_regressor_h36m.npy'), use_auth_token=os.environ['ICON'])
+VERTEX_TEXTURE_FILE = cached_download(os.path.join(pymaf_data_dir, 'vertex_texture.npy'), use_auth_token=os.environ['ICON'])
+SMPL_MEAN_PARAMS = cached_download(os.path.join(pymaf_data_dir, 'smpl_mean_params.npz'), use_auth_token=os.environ['ICON'])
+CHECKPOINT_FILE = cached_download(os.path.join(pymaf_data_dir, 'pretrained_model/PyMAF_model_checkpoint.pt'), use_auth_token=os.environ['ICON'])

lib / pymaf /models / __init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .hmr import hmr
+from .pymaf_net import pymaf_net
+from .smpl import SMPL

lib / pymaf /models / pymaf_net.py ADDED Viewed

	@@ -0,0 +1,362 @@

+import torch
+import torch.nn as nn
+import numpy as np
+from lib.pymaf.utils.geometry import rot6d_to_rotmat, projection, rotation_matrix_to_angle_axis
+from .maf_extractor import MAF_Extractor
+from .smpl import SMPL, SMPL_MODEL_DIR, SMPL_MEAN_PARAMS, H36M_TO_J14
+from .hmr import ResNet_Backbone
+from .res_module import IUV_predict_layer
+from lib.common.config import cfg
+import logging
+logger = logging.getLogger(__name__)
+BN_MOMENTUM = 0.1
+class Regressor(nn.Module):
+    def __init__(self, feat_dim, smpl_mean_params):
+        super().__init__()
+        npose = 24 * 6
+        self.fc1 = nn.Linear(feat_dim + npose + 13, 1024)
+        self.drop1 = nn.Dropout()
+        self.fc2 = nn.Linear(1024, 1024)
+        self.drop2 = nn.Dropout()
+        self.decpose = nn.Linear(1024, npose)
+        self.decshape = nn.Linear(1024, 10)
+        self.deccam = nn.Linear(1024, 3)
+        nn.init.xavier_uniform_(self.decpose.weight, gain=0.01)
+        nn.init.xavier_uniform_(self.decshape.weight, gain=0.01)
+        nn.init.xavier_uniform_(self.deccam.weight, gain=0.01)
+        self.smpl = SMPL(SMPL_MODEL_DIR, batch_size=64, create_transl=False)
+        mean_params = np.load(smpl_mean_params)
+        init_pose = torch.from_numpy(mean_params['pose'][:]).unsqueeze(0)
+        init_shape = torch.from_numpy(
+            mean_params['shape'][:].astype('float32')).unsqueeze(0)
+        init_cam = torch.from_numpy(mean_params['cam']).unsqueeze(0)
+        self.register_buffer('init_pose', init_pose)
+        self.register_buffer('init_shape', init_shape)
+        self.register_buffer('init_cam', init_cam)
+    def forward(self,
+                x,
+                init_pose=None,
+                init_shape=None,
+                init_cam=None,
+                n_iter=1,
+                J_regressor=None):
+        batch_size = x.shape[0]
+        if init_pose is None:
+            init_pose = self.init_pose.expand(batch_size, -1)
+        if init_shape is None:
+            init_shape = self.init_shape.expand(batch_size, -1)
+        if init_cam is None:
+            init_cam = self.init_cam.expand(batch_size, -1)
+        pred_pose = init_pose
+        pred_shape = init_shape
+        pred_cam = init_cam
+        for i in range(n_iter):
+            xc = torch.cat([x, pred_pose, pred_shape, pred_cam], 1)
+            xc = self.fc1(xc)
+            xc = self.drop1(xc)
+            xc = self.fc2(xc)
+            xc = self.drop2(xc)
+            pred_pose = self.decpose(xc) + pred_pose
+            pred_shape = self.decshape(xc) + pred_shape
+            pred_cam = self.deccam(xc) + pred_cam
+        pred_rotmat = rot6d_to_rotmat(pred_pose).view(batch_size, 24, 3, 3)
+        pred_output = self.smpl(betas=pred_shape,
+                                body_pose=pred_rotmat[:, 1:],
+                                global_orient=pred_rotmat[:, 0].unsqueeze(1),
+                                pose2rot=False)
+        pred_vertices = pred_output.vertices
+        pred_joints = pred_output.joints
+        pred_smpl_joints = pred_output.smpl_joints
+        pred_keypoints_2d = projection(pred_joints, pred_cam)
+        pose = rotation_matrix_to_angle_axis(pred_rotmat.reshape(-1, 3,
+                                                                 3)).reshape(
+                                                                     -1, 72)
+        if J_regressor is not None:
+            pred_joints = torch.matmul(J_regressor, pred_vertices)
+            pred_pelvis = pred_joints[:, [0], :].clone()
+            pred_joints = pred_joints[:, H36M_TO_J14, :]
+            pred_joints = pred_joints - pred_pelvis
+        output = {
+            'theta': torch.cat([pred_cam, pred_shape, pose], dim=1),
+            'verts': pred_vertices,
+            'kp_2d': pred_keypoints_2d,
+            'kp_3d': pred_joints,
+            'smpl_kp_3d': pred_smpl_joints,
+            'rotmat': pred_rotmat,
+            'pred_cam': pred_cam,
+            'pred_shape': pred_shape,
+            'pred_pose': pred_pose,
+        }
+        return output
+    def forward_init(self,
+                     x,
+                     init_pose=None,
+                     init_shape=None,
+                     init_cam=None,
+                     n_iter=1,
+                     J_regressor=None):
+        batch_size = x.shape[0]
+        if init_pose is None:
+            init_pose = self.init_pose.expand(batch_size, -1)
+        if init_shape is None:
+            init_shape = self.init_shape.expand(batch_size, -1)
+        if init_cam is None:
+            init_cam = self.init_cam.expand(batch_size, -1)
+        pred_pose = init_pose
+        pred_shape = init_shape
+        pred_cam = init_cam
+        pred_rotmat = rot6d_to_rotmat(pred_pose.contiguous()).view(
+            batch_size, 24, 3, 3)
+        pred_output = self.smpl(betas=pred_shape,
+                                body_pose=pred_rotmat[:, 1:],
+                                global_orient=pred_rotmat[:, 0].unsqueeze(1),
+                                pose2rot=False)
+        pred_vertices = pred_output.vertices
+        pred_joints = pred_output.joints
+        pred_smpl_joints = pred_output.smpl_joints
+        pred_keypoints_2d = projection(pred_joints, pred_cam)
+        pose = rotation_matrix_to_angle_axis(pred_rotmat.reshape(-1, 3,
+                                                                 3)).reshape(
+                                                                     -1, 72)
+        if J_regressor is not None:
+            pred_joints = torch.matmul(J_regressor, pred_vertices)
+            pred_pelvis = pred_joints[:, [0], :].clone()
+            pred_joints = pred_joints[:, H36M_TO_J14, :]
+            pred_joints = pred_joints - pred_pelvis
+        output = {
+            'theta': torch.cat([pred_cam, pred_shape, pose], dim=1),
+            'verts': pred_vertices,
+            'kp_2d': pred_keypoints_2d,
+            'kp_3d': pred_joints,
+            'smpl_kp_3d': pred_smpl_joints,
+            'rotmat': pred_rotmat,
+            'pred_cam': pred_cam,
+            'pred_shape': pred_shape,
+            'pred_pose': pred_pose,
+        }
+        return output
+class PyMAF(nn.Module):
+    """ PyMAF based Deep Regressor for Human Mesh Recovery
+    PyMAF: 3D Human Pose and Shape Regression with Pyramidal Mesh Alignment Feedback Loop, in ICCV, 2021
+    """
+    def __init__(self, smpl_mean_params=SMPL_MEAN_PARAMS, pretrained=True):
+        super().__init__()
+        self.feature_extractor = ResNet_Backbone(
+            model=cfg.MODEL.PyMAF.BACKBONE, pretrained=pretrained)
+        # deconv layers
+        self.inplanes = self.feature_extractor.inplanes
+        self.deconv_with_bias = cfg.RES_MODEL.DECONV_WITH_BIAS
+        self.deconv_layers = self._make_deconv_layer(
+            cfg.RES_MODEL.NUM_DECONV_LAYERS,
+            cfg.RES_MODEL.NUM_DECONV_FILTERS,
+            cfg.RES_MODEL.NUM_DECONV_KERNELS,
+        )
+        self.maf_extractor = nn.ModuleList()
+        for _ in range(cfg.MODEL.PyMAF.N_ITER):
+            self.maf_extractor.append(MAF_Extractor())
+        ma_feat_len = self.maf_extractor[-1].Dmap.shape[
+            0] * cfg.MODEL.PyMAF.MLP_DIM[-1]
+        grid_size = 21
+        xv, yv = torch.meshgrid([
+            torch.linspace(-1, 1, grid_size),
+            torch.linspace(-1, 1, grid_size)
+        ])
+        points_grid = torch.stack([xv.reshape(-1),
+                                   yv.reshape(-1)]).unsqueeze(0)
+        self.register_buffer('points_grid', points_grid)
+        grid_feat_len = grid_size * grid_size * cfg.MODEL.PyMAF.MLP_DIM[-1]
+        self.regressor = nn.ModuleList()
+        for i in range(cfg.MODEL.PyMAF.N_ITER):
+            if i == 0:
+                ref_infeat_dim = grid_feat_len
+            else:
+                ref_infeat_dim = ma_feat_len
+            self.regressor.append(
+                Regressor(feat_dim=ref_infeat_dim,
+                          smpl_mean_params=smpl_mean_params))
+        dp_feat_dim = 256
+        self.with_uv = cfg.LOSS.POINT_REGRESSION_WEIGHTS > 0
+        if cfg.MODEL.PyMAF.AUX_SUPV_ON:
+            self.dp_head = IUV_predict_layer(feat_dim=dp_feat_dim)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes,
+                          planes * block.expansion,
+                          kernel_size=1,
+                          stride=stride,
+                          bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+        """
+        Deconv_layer used in Simple Baselines:
+        Xiao et al. Simple Baselines for Human Pose Estimation and Tracking
+        https://github.com/microsoft/human-pose-estimation.pytorch
+        """
+        assert num_layers == len(num_filters), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+        assert num_layers == len(num_kernels), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+        def _get_deconv_cfg(deconv_kernel, index):
+            if deconv_kernel == 4:
+                padding = 1
+                output_padding = 0
+            elif deconv_kernel == 3:
+                padding = 1
+                output_padding = 1
+            elif deconv_kernel == 2:
+                padding = 0
+                output_padding = 0
+            return deconv_kernel, padding, output_padding
+        layers = []
+        for i in range(num_layers):
+            kernel, padding, output_padding = _get_deconv_cfg(
+                num_kernels[i], i)
+            planes = num_filters[i]
+            layers.append(
+                nn.ConvTranspose2d(in_channels=self.inplanes,
+                                   out_channels=planes,
+                                   kernel_size=kernel,
+                                   stride=2,
+                                   padding=padding,
+                                   output_padding=output_padding,
+                                   bias=self.deconv_with_bias))
+            layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
+            layers.append(nn.ReLU(inplace=True))
+            self.inplanes = planes
+        return nn.Sequential(*layers)
+    def forward(self, x, J_regressor=None):
+        batch_size = x.shape[0]
+        # spatial features and global features
+        s_feat, g_feat = self.feature_extractor(x)
+        assert cfg.MODEL.PyMAF.N_ITER >= 0 and cfg.MODEL.PyMAF.N_ITER <= 3
+        if cfg.MODEL.PyMAF.N_ITER == 1:
+            deconv_blocks = [self.deconv_layers]
+        elif cfg.MODEL.PyMAF.N_ITER == 2:
+            deconv_blocks = [self.deconv_layers[0:6], self.deconv_layers[6:9]]
+        elif cfg.MODEL.PyMAF.N_ITER == 3:
+            deconv_blocks = [
+                self.deconv_layers[0:3], self.deconv_layers[3:6],
+                self.deconv_layers[6:9]
+            ]
+        out_list = {}
+        # initial parameters
+        # TODO: remove the initial mesh generation during forward to reduce runtime
+        # by generating initial mesh the beforehand: smpl_output = self.init_smpl
+        smpl_output = self.regressor[0].forward_init(g_feat,
+                                                     J_regressor=J_regressor)
+        out_list['smpl_out'] = [smpl_output]
+        out_list['dp_out'] = []
+        # for visulization
+        vis_feat_list = [s_feat.detach()]
+        # parameter predictions
+        for rf_i in range(cfg.MODEL.PyMAF.N_ITER):
+            pred_cam = smpl_output['pred_cam']
+            pred_shape = smpl_output['pred_shape']
+            pred_pose = smpl_output['pred_pose']
+            pred_cam = pred_cam.detach()
+            pred_shape = pred_shape.detach()
+            pred_pose = pred_pose.detach()
+            s_feat_i = deconv_blocks[rf_i](s_feat)
+            s_feat = s_feat_i
+            vis_feat_list.append(s_feat_i.detach())
+            self.maf_extractor[rf_i].im_feat = s_feat_i
+            self.maf_extractor[rf_i].cam = pred_cam
+            if rf_i == 0:
+                sample_points = torch.transpose(
+                    self.points_grid.expand(batch_size, -1, -1), 1, 2)
+                ref_feature = self.maf_extractor[rf_i].sampling(sample_points)
+            else:
+                pred_smpl_verts = smpl_output['verts'].detach()
+                # TODO: use a more sparse SMPL implementation (with 431 vertices) for acceleration
+                pred_smpl_verts_ds = torch.matmul(
+                    self.maf_extractor[rf_i].Dmap.unsqueeze(0),
+                    pred_smpl_verts)  # [B, 431, 3]
+                ref_feature = self.maf_extractor[rf_i](
+                    pred_smpl_verts_ds)  # [B, 431 * n_feat]
+            smpl_output = self.regressor[rf_i](ref_feature,
+                                               pred_pose,
+                                               pred_shape,
+                                               pred_cam,
+                                               n_iter=1,
+                                               J_regressor=J_regressor)
+            out_list['smpl_out'].append(smpl_output)
+        if self.training and cfg.MODEL.PyMAF.AUX_SUPV_ON:
+            iuv_out_dict = self.dp_head(s_feat)
+            out_list['dp_out'].append(iuv_out_dict)
+        return out_list
+def pymaf_net(smpl_mean_params, pretrained=True):
+    """ Constructs an PyMAF model with ResNet50 backbone.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = PyMAF(smpl_mean_params, pretrained)
+    return model

lib / pymaf /models / smpl.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# This script is borrowed from https://github.com/nkolot/SPIN/blob/master/models/smpl.py
+import torch
+import numpy as np
+from lib.smplx import SMPL as _SMPL
+from lib.smplx.body_models import ModelOutput
+from lib.smplx.lbs import vertices2joints
+from collections import namedtuple
+from lib.pymaf.core import path_config, constants
+SMPL_MEAN_PARAMS = path_config.SMPL_MEAN_PARAMS
+SMPL_MODEL_DIR = path_config.SMPL_MODEL_DIR
+# Indices to get the 14 LSP joints from the 17 H36M joints
+H36M_TO_J17 = [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10, 0, 7, 9]
+H36M_TO_J14 = H36M_TO_J17[:14]
+class SMPL(_SMPL):
+    """ Extension of the official SMPL implementation to support more joints """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        joints = [constants.JOINT_MAP[i] for i in constants.JOINT_NAMES]
+        J_regressor_extra = np.load(path_config.JOINT_REGRESSOR_TRAIN_EXTRA)
+        self.register_buffer(
+            'J_regressor_extra',
+            torch.tensor(J_regressor_extra, dtype=torch.float32))
+        self.joint_map = torch.tensor(joints, dtype=torch.long)
+        self.ModelOutput = namedtuple(
+            'ModelOutput_', ModelOutput._fields + (
+                'smpl_joints',
+                'joints_J19',
+            ))
+        self.ModelOutput.__new__.__defaults__ = (None, ) * len(
+            self.ModelOutput._fields)
+    def forward(self, *args, **kwargs):
+        kwargs['get_skin'] = True
+        smpl_output = super().forward(*args, **kwargs)
+        extra_joints = vertices2joints(self.J_regressor_extra,
+                                       smpl_output.vertices)
+        # smpl_output.joints: [B, 45, 3]  extra_joints: [B, 9, 3]
+        vertices = smpl_output.vertices
+        joints = torch.cat([smpl_output.joints, extra_joints], dim=1)
+        smpl_joints = smpl_output.joints[:, :24]
+        joints = joints[:, self.joint_map, :]  # [B, 49, 3]
+        joints_J24 = joints[:, -24:, :]
+        joints_J19 = joints_J24[:, constants.J24_TO_J19, :]
+        output = self.ModelOutput(vertices=vertices,
+                                  global_orient=smpl_output.global_orient,
+                                  body_pose=smpl_output.body_pose,
+                                  joints=joints,
+                                  joints_J19=joints_J19,
+                                  smpl_joints=smpl_joints,
+                                  betas=smpl_output.betas,
+                                  full_pose=smpl_output.full_pose)
+        return output
+def get_smpl_faces():
+    smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False)
+    return smpl.faces
+def get_part_joints(smpl_joints):
+    batch_size = smpl_joints.shape[0]
+    # part_joints = torch.zeros().to(smpl_joints.device)
+    one_seg_pairs = [(0, 1), (0, 2), (0, 3), (3, 6), (9, 12), (9, 13), (9, 14),
+                     (12, 15), (13, 16), (14, 17)]
+    two_seg_pairs = [(1, 4), (2, 5), (4, 7), (5, 8), (16, 18), (17, 19),
+                     (18, 20), (19, 21)]
+    one_seg_pairs.extend(two_seg_pairs)
+    single_joints = [(10), (11), (15), (22), (23)]
+    part_joints = []
+    for j_p in one_seg_pairs:
+        new_joint = torch.mean(smpl_joints[:, j_p], dim=1, keepdim=True)
+        part_joints.append(new_joint)
+    for j_p in single_joints:
+        part_joints.append(smpl_joints[:, j_p:j_p + 1])
+    part_joints = torch.cat(part_joints, dim=1)
+    return part_joints

lib / pymaf /models /hmr.py ADDED Viewed

	@@ -0,0 +1,303 @@

+# This script is borrowed from https://github.com/nkolot/SPIN/blob/master/models/hmr.py
+import torch
+import torch.nn as nn
+import torchvision.models.resnet as resnet
+import numpy as np
+import math
+from lib.pymaf.utils.geometry import rot6d_to_rotmat
+import logging
+logger = logging.getLogger(__name__)
+BN_MOMENTUM = 0.1
+class Bottleneck(nn.Module):
+    """ Redefinition of Bottleneck residual block
+        Adapted from the official PyTorch implementation
+    """
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super().__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes,
+                               planes,
+                               kernel_size=3,
+                               stride=stride,
+                               padding=1,
+                               bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResNet_Backbone(nn.Module):
+    """ Feature Extrator with ResNet backbone
+    """
+    def __init__(self, model='res50', pretrained=True):
+        if model == 'res50':
+            block, layers = Bottleneck, [3, 4, 6, 3]
+        else:
+            pass  # TODO
+        self.inplanes = 64
+        super().__init__()
+        npose = 24 * 6
+        self.conv1 = nn.Conv2d(3,
+                               64,
+                               kernel_size=7,
+                               stride=2,
+                               padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AvgPool2d(7, stride=1)
+        if pretrained:
+            resnet_imagenet = resnet.resnet50(pretrained=True)
+            self.load_state_dict(resnet_imagenet.state_dict(), strict=False)
+            logger.info('loaded resnet50 imagenet pretrained model')
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes,
+                          planes * block.expansion,
+                          kernel_size=1,
+                          stride=stride,
+                          bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+        assert num_layers == len(num_filters), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+        assert num_layers == len(num_kernels), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+        def _get_deconv_cfg(deconv_kernel, index):
+            if deconv_kernel == 4:
+                padding = 1
+                output_padding = 0
+            elif deconv_kernel == 3:
+                padding = 1
+                output_padding = 1
+            elif deconv_kernel == 2:
+                padding = 0
+                output_padding = 0
+            return deconv_kernel, padding, output_padding
+        layers = []
+        for i in range(num_layers):
+            kernel, padding, output_padding = _get_deconv_cfg(
+                num_kernels[i], i)
+            planes = num_filters[i]
+            layers.append(
+                nn.ConvTranspose2d(in_channels=self.inplanes,
+                                   out_channels=planes,
+                                   kernel_size=kernel,
+                                   stride=2,
+                                   padding=padding,
+                                   output_padding=output_padding,
+                                   bias=self.deconv_with_bias))
+            layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
+            layers.append(nn.ReLU(inplace=True))
+            self.inplanes = planes
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        batch_size = x.shape[0]
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x1 = self.layer1(x)
+        x2 = self.layer2(x1)
+        x3 = self.layer3(x2)
+        x4 = self.layer4(x3)
+        xf = self.avgpool(x4)
+        xf = xf.view(xf.size(0), -1)
+        x_featmap = x4
+        return x_featmap, xf
+class HMR(nn.Module):
+    """ SMPL Iterative Regressor with ResNet50 backbone
+    """
+    def __init__(self, block, layers, smpl_mean_params):
+        self.inplanes = 64
+        super().__init__()
+        npose = 24 * 6
+        self.conv1 = nn.Conv2d(3,
+                               64,
+                               kernel_size=7,
+                               stride=2,
+                               padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AvgPool2d(7, stride=1)
+        self.fc1 = nn.Linear(512 * block.expansion + npose + 13, 1024)
+        self.drop1 = nn.Dropout()
+        self.fc2 = nn.Linear(1024, 1024)
+        self.drop2 = nn.Dropout()
+        self.decpose = nn.Linear(1024, npose)
+        self.decshape = nn.Linear(1024, 10)
+        self.deccam = nn.Linear(1024, 3)
+        nn.init.xavier_uniform_(self.decpose.weight, gain=0.01)
+        nn.init.xavier_uniform_(self.decshape.weight, gain=0.01)
+        nn.init.xavier_uniform_(self.deccam.weight, gain=0.01)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+        mean_params = np.load(smpl_mean_params)
+        init_pose = torch.from_numpy(mean_params['pose'][:]).unsqueeze(0)
+        init_shape = torch.from_numpy(
+            mean_params['shape'][:].astype('float32')).unsqueeze(0)
+        init_cam = torch.from_numpy(mean_params['cam']).unsqueeze(0)
+        self.register_buffer('init_pose', init_pose)
+        self.register_buffer('init_shape', init_shape)
+        self.register_buffer('init_cam', init_cam)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes,
+                          planes * block.expansion,
+                          kernel_size=1,
+                          stride=stride,
+                          bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self,
+                x,
+                init_pose=None,
+                init_shape=None,
+                init_cam=None,
+                n_iter=3):
+        batch_size = x.shape[0]
+        if init_pose is None:
+            init_pose = self.init_pose.expand(batch_size, -1)
+        if init_shape is None:
+            init_shape = self.init_shape.expand(batch_size, -1)
+        if init_cam is None:
+            init_cam = self.init_cam.expand(batch_size, -1)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x1 = self.layer1(x)
+        x2 = self.layer2(x1)
+        x3 = self.layer3(x2)
+        x4 = self.layer4(x3)
+        xf = self.avgpool(x4)
+        xf = xf.view(xf.size(0), -1)
+        pred_pose = init_pose
+        pred_shape = init_shape
+        pred_cam = init_cam
+        for i in range(n_iter):
+            xc = torch.cat([xf, pred_pose, pred_shape, pred_cam], 1)
+            xc = self.fc1(xc)
+            xc = self.drop1(xc)
+            xc = self.fc2(xc)
+            xc = self.drop2(xc)
+            pred_pose = self.decpose(xc) + pred_pose
+            pred_shape = self.decshape(xc) + pred_shape
+            pred_cam = self.deccam(xc) + pred_cam
+        pred_rotmat = rot6d_to_rotmat(pred_pose).view(batch_size, 24, 3, 3)
+        return pred_rotmat, pred_shape, pred_cam
+def hmr(smpl_mean_params, pretrained=True, **kwargs):
+    """ Constructs an HMR model with ResNet50 backbone.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = HMR(Bottleneck, [3, 4, 6, 3], smpl_mean_params, **kwargs)
+    if pretrained:
+        resnet_imagenet = resnet.resnet50(pretrained=True)
+        model.load_state_dict(resnet_imagenet.state_dict(), strict=False)
+    return model

lib / pymaf /models /maf_extractor.py ADDED Viewed

	@@ -0,0 +1,135 @@

+# This script is borrowed and extended from https://github.com/shunsukesaito/PIFu/blob/master/lib/model/SurfaceClassifier.py
+from packaging import version
+import torch
+import scipy
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+from lib.common.config import cfg
+from lib.pymaf.utils.geometry import projection
+from lib.pymaf.core.path_config import MESH_DOWNSAMPLEING
+import logging
+logger = logging.getLogger(__name__)
+class MAF_Extractor(nn.Module):
+    ''' Mesh-aligned Feature Extrator
+    As discussed in the paper, we extract mesh-aligned features based on 2D projection of the mesh vertices.
+    The features extrated from spatial feature maps will go through a MLP for dimension reduction.
+    '''
+    def __init__(self, device=torch.device('cuda')):
+        super().__init__()
+        self.device = device
+        self.filters = []
+        self.num_views = 1
+        filter_channels = cfg.MODEL.PyMAF.MLP_DIM
+        self.last_op = nn.ReLU(True)
+        for l in range(0, len(filter_channels) - 1):
+            if 0 != l:
+                self.filters.append(
+                    nn.Conv1d(filter_channels[l] + filter_channels[0],
+                              filter_channels[l + 1], 1))
+            else:
+                self.filters.append(
+                    nn.Conv1d(filter_channels[l], filter_channels[l + 1], 1))
+            self.add_module("conv%d" % l, self.filters[l])
+        self.im_feat = None
+        self.cam = None
+        # downsample SMPL mesh and assign part labels
+        # from https://github.com/nkolot/GraphCMR/blob/master/data/mesh_downsampling.npz
+        smpl_mesh_graph = np.load(MESH_DOWNSAMPLEING,
+                                  allow_pickle=True,
+                                  encoding='latin1')
+        A = smpl_mesh_graph['A']
+        U = smpl_mesh_graph['U']
+        D = smpl_mesh_graph['D']  # shape: (2,)
+        # downsampling
+        ptD = []
+        for i in range(len(D)):
+            d = scipy.sparse.coo_matrix(D[i])
+            i = torch.LongTensor(np.array([d.row, d.col]))
+            v = torch.FloatTensor(d.data)
+            ptD.append(torch.sparse.FloatTensor(i, v, d.shape))
+        # downsampling mapping from 6890 points to 431 points
+        # ptD[0].to_dense() - Size: [1723, 6890]
+        # ptD[1].to_dense() - Size: [431. 1723]
+        Dmap = torch.matmul(ptD[1].to_dense(),
+                            ptD[0].to_dense())  # 6890 -> 431
+        self.register_buffer('Dmap', Dmap)
+    def reduce_dim(self, feature):
+        '''
+        Dimension reduction by multi-layer perceptrons
+        :param feature: list of [B, C_s, N] point-wise features before dimension reduction
+        :return: [B, C_p x N] concatantion of point-wise features after dimension reduction
+        '''
+        y = feature
+        tmpy = feature
+        for i, f in enumerate(self.filters):
+            y = self._modules['conv' +
+                              str(i)](y if i == 0 else torch.cat([y, tmpy], 1))
+            if i != len(self.filters) - 1:
+                y = F.leaky_relu(y)
+            if self.num_views > 1 and i == len(self.filters) // 2:
+                y = y.view(-1, self.num_views, y.shape[1],
+                           y.shape[2]).mean(dim=1)
+                tmpy = feature.view(-1, self.num_views, feature.shape[1],
+                                    feature.shape[2]).mean(dim=1)
+        y = self.last_op(y)
+        y = y.view(y.shape[0], -1)
+        return y
+    def sampling(self, points, im_feat=None, z_feat=None):
+        '''
+        Given 2D points, sample the point-wise features for each point,
+        the dimension of point-wise features will be reduced from C_s to C_p by MLP.
+        Image features should be pre-computed before this call.
+        :param points: [B, N, 2] image coordinates of points
+        :im_feat: [B, C_s, H_s, W_s] spatial feature maps
+        :return: [B, C_p x N] concatantion of point-wise features after dimension reduction
+        '''
+        if im_feat is None:
+            im_feat = self.im_feat
+        batch_size = im_feat.shape[0]
+        if version.parse(torch.__version__) >= version.parse('1.3.0'):
+            # Default grid_sample behavior has changed to align_corners=False since 1.3.0.
+            point_feat = torch.nn.functional.grid_sample(
+                im_feat, points.unsqueeze(2), align_corners=True)[..., 0]
+        else:
+            point_feat = torch.nn.functional.grid_sample(
+                im_feat, points.unsqueeze(2))[..., 0]
+        mesh_align_feat = self.reduce_dim(point_feat)
+        return mesh_align_feat
+    def forward(self, p, s_feat=None, cam=None, **kwargs):
+        ''' Returns mesh-aligned features for the 3D mesh points.
+        Args:
+            p (tensor): [B, N_m, 3] mesh vertices
+            s_feat (tensor): [B, C_s, H_s, W_s] spatial feature maps
+            cam (tensor): [B, 3] camera
+        Return:
+            mesh_align_feat (tensor): [B, C_p x N_m] mesh-aligned features
+        '''
+        if cam is None:
+            cam = self.cam
+        p_proj_2d = projection(p, cam, retain_z=False)
+        mesh_align_feat = self.sampling(p_proj_2d, s_feat)
+        return mesh_align_feat

lib / pymaf /models /res_module.py ADDED Viewed

	@@ -0,0 +1,385 @@

+# code brought in part from https://github.com/microsoft/human-pose-estimation.pytorch/blob/master/lib/models/pose_resnet.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from collections import OrderedDict
+import os
+from lib.pymaf.core.cfgs import cfg
+import logging
+logger = logging.getLogger(__name__)
+BN_MOMENTUM = 0.1
+def conv3x3(in_planes, out_planes, stride=1, bias=False, groups=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes * groups,
+                     out_planes * groups,
+                     kernel_size=3,
+                     stride=stride,
+                     padding=1,
+                     bias=bias,
+                     groups=groups)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1):
+        super().__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride, groups=groups)
+        self.bn1 = nn.BatchNorm2d(planes * groups, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes, groups=groups)
+        self.bn2 = nn.BatchNorm2d(planes * groups, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1):
+        super().__init__()
+        self.conv1 = nn.Conv2d(inplanes * groups,
+                               planes * groups,
+                               kernel_size=1,
+                               bias=False,
+                               groups=groups)
+        self.bn1 = nn.BatchNorm2d(planes * groups, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes * groups,
+                               planes * groups,
+                               kernel_size=3,
+                               stride=stride,
+                               padding=1,
+                               bias=False,
+                               groups=groups)
+        self.bn2 = nn.BatchNorm2d(planes * groups, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes * groups,
+                               planes * self.expansion * groups,
+                               kernel_size=1,
+                               bias=False,
+                               groups=groups)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion * groups,
+                                  momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+resnet_spec = {
+    18: (BasicBlock, [2, 2, 2, 2]),
+    34: (BasicBlock, [3, 4, 6, 3]),
+    50: (Bottleneck, [3, 4, 6, 3]),
+    101: (Bottleneck, [3, 4, 23, 3]),
+    152: (Bottleneck, [3, 8, 36, 3])
+}
+class IUV_predict_layer(nn.Module):
+    def __init__(self,
+                 feat_dim=256,
+                 final_cov_k=3,
+                 part_out_dim=25,
+                 with_uv=True):
+        super().__init__()
+        self.with_uv = with_uv
+        if self.with_uv:
+            self.predict_u = nn.Conv2d(in_channels=feat_dim,
+                                       out_channels=25,
+                                       kernel_size=final_cov_k,
+                                       stride=1,
+                                       padding=1 if final_cov_k == 3 else 0)
+            self.predict_v = nn.Conv2d(in_channels=feat_dim,
+                                       out_channels=25,
+                                       kernel_size=final_cov_k,
+                                       stride=1,
+                                       padding=1 if final_cov_k == 3 else 0)
+        self.predict_ann_index = nn.Conv2d(
+            in_channels=feat_dim,
+            out_channels=15,
+            kernel_size=final_cov_k,
+            stride=1,
+            padding=1 if final_cov_k == 3 else 0)
+        self.predict_uv_index = nn.Conv2d(in_channels=feat_dim,
+                                          out_channels=25,
+                                          kernel_size=final_cov_k,
+                                          stride=1,
+                                          padding=1 if final_cov_k == 3 else 0)
+        self.inplanes = feat_dim
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes,
+                          planes * block.expansion,
+                          kernel_size=1,
+                          stride=stride,
+                          bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        return_dict = {}
+        predict_uv_index = self.predict_uv_index(x)
+        predict_ann_index = self.predict_ann_index(x)
+        return_dict['predict_uv_index'] = predict_uv_index
+        return_dict['predict_ann_index'] = predict_ann_index
+        if self.with_uv:
+            predict_u = self.predict_u(x)
+            predict_v = self.predict_v(x)
+            return_dict['predict_u'] = predict_u
+            return_dict['predict_v'] = predict_v
+        else:
+            return_dict['predict_u'] = None
+            return_dict['predict_v'] = None
+            # return_dict['predict_u'] = torch.zeros(predict_uv_index.shape).to(predict_uv_index.device)
+            # return_dict['predict_v'] = torch.zeros(predict_uv_index.shape).to(predict_uv_index.device)
+        return return_dict
+class SmplResNet(nn.Module):
+    def __init__(self,
+                 resnet_nums,
+                 in_channels=3,
+                 num_classes=229,
+                 last_stride=2,
+                 n_extra_feat=0,
+                 truncate=0,
+                 **kwargs):
+        super().__init__()
+        self.inplanes = 64
+        self.truncate = truncate
+        # extra = cfg.MODEL.EXTRA
+        # self.deconv_with_bias = extra.DECONV_WITH_BIAS
+        block, layers = resnet_spec[resnet_nums]
+        self.conv1 = nn.Conv2d(in_channels,
+                               64,
+                               kernel_size=7,
+                               stride=2,
+                               padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2],
+                                       stride=2) if truncate < 2 else None
+        self.layer4 = self._make_layer(
+            block, 512, layers[3],
+            stride=last_stride) if truncate < 1 else None
+        self.avg_pooling = nn.AdaptiveAvgPool2d(1)
+        self.num_classes = num_classes
+        if num_classes > 0:
+            self.final_layer = nn.Linear(512 * block.expansion, num_classes)
+            nn.init.xavier_uniform_(self.final_layer.weight, gain=0.01)
+        self.n_extra_feat = n_extra_feat
+        if n_extra_feat > 0:
+            self.trans_conv = nn.Sequential(
+                nn.Conv2d(n_extra_feat + 512 * block.expansion,
+                          512 * block.expansion,
+                          kernel_size=1,
+                          bias=False),
+                nn.BatchNorm2d(512 * block.expansion, momentum=BN_MOMENTUM),
+                nn.ReLU(True))
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes,
+                          planes * block.expansion,
+                          kernel_size=1,
+                          stride=stride,
+                          bias=False),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x, infeat=None):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x1 = self.layer1(x)
+        x2 = self.layer2(x1)
+        x3 = self.layer3(x2) if self.truncate < 2 else x2
+        x4 = self.layer4(x3) if self.truncate < 1 else x3
+        if infeat is not None:
+            x4 = self.trans_conv(torch.cat([infeat, x4], 1))
+        if self.num_classes > 0:
+            xp = self.avg_pooling(x4)
+            cls = self.final_layer(xp.view(xp.size(0), -1))
+            if not cfg.DANET.USE_MEAN_PARA:
+                # for non-negative scale
+                scale = F.relu(cls[:, 0]).unsqueeze(1)
+                cls = torch.cat((scale, cls[:, 1:]), dim=1)
+        else:
+            cls = None
+        return cls, {'x4': x4}
+    def init_weights(self, pretrained=''):
+        if os.path.isfile(pretrained):
+            logger.info('=> loading pretrained model {}'.format(pretrained))
+            # self.load_state_dict(pretrained_state_dict, strict=False)
+            checkpoint = torch.load(pretrained)
+            if isinstance(checkpoint, OrderedDict):
+                # state_dict = checkpoint
+                state_dict_old = self.state_dict()
+                for key in state_dict_old.keys():
+                    if key in checkpoint.keys():
+                        if state_dict_old[key].shape != checkpoint[key].shape:
+                            del checkpoint[key]
+                state_dict = checkpoint
+            elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
+                state_dict_old = checkpoint['state_dict']
+                state_dict = OrderedDict()
+                # delete 'module.' because it is saved from DataParallel module
+                for key in state_dict_old.keys():
+                    if key.startswith('module.'):
+                        # state_dict[key[7:]] = state_dict[key]
+                        # state_dict.pop(key)
+                        state_dict[key[7:]] = state_dict_old[key]
+                    else:
+                        state_dict[key] = state_dict_old[key]
+            else:
+                raise RuntimeError(
+                    'No state_dict found in checkpoint file {}'.format(
+                        pretrained))
+            self.load_state_dict(state_dict, strict=False)
+        else:
+            logger.error('=> imagenet pretrained model dose not exist')
+            logger.error('=> please download it first')
+            raise ValueError('imagenet pretrained model does not exist')
+class LimbResLayers(nn.Module):
+    def __init__(self,
+                 resnet_nums,
+                 inplanes,
+                 outplanes=None,
+                 groups=1,
+                 **kwargs):
+        super().__init__()
+        self.inplanes = inplanes
+        block, layers = resnet_spec[resnet_nums]
+        self.outplanes = 512 if outplanes == None else outplanes
+        self.layer4 = self._make_layer(block,
+                                       self.outplanes,
+                                       layers[3],
+                                       stride=2,
+                                       groups=groups)
+        self.avg_pooling = nn.AdaptiveAvgPool2d(1)
+    def _make_layer(self, block, planes, blocks, stride=1, groups=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes * groups,
+                          planes * block.expansion * groups,
+                          kernel_size=1,
+                          stride=stride,
+                          bias=False,
+                          groups=groups),
+                nn.BatchNorm2d(planes * block.expansion * groups,
+                               momentum=BN_MOMENTUM),
+            )
+        layers = []
+        layers.append(
+            block(self.inplanes, planes, stride, downsample, groups=groups))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=groups))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.layer4(x)
+        x = self.avg_pooling(x)
+        return x

lib / pymaf /utils / __init__.py ADDED Viewed

File without changes

lib / pymaf /utils / geometry.py ADDED Viewed

	@@ -0,0 +1,435 @@

+import torch
+import numpy as np
+from torch.nn import functional as F
+"""
+Useful geometric operations, e.g. Perspective projection and a differentiable Rodrigues formula
+Parts of the code are taken from https://github.com/MandyMo/pytorch_HMR
+"""
+def batch_rodrigues(theta):
+    """Convert axis-angle representation to rotation matrix.
+    Args:
+        theta: size = [B, 3]
+    Returns:
+        Rotation matrix corresponding to the quaternion -- size = [B, 3, 3]
+    """
+    l1norm = torch.norm(theta + 1e-8, p=2, dim=1)
+    angle = torch.unsqueeze(l1norm, -1)
+    normalized = torch.div(theta, angle)
+    angle = angle * 0.5
+    v_cos = torch.cos(angle)
+    v_sin = torch.sin(angle)
+    quat = torch.cat([v_cos, v_sin * normalized], dim=1)
+    return quat_to_rotmat(quat)
+def quat_to_rotmat(quat):
+    """Convert quaternion coefficients to rotation matrix.
+    Args:
+        quat: size = [B, 4] 4 <===>(w, x, y, z)
+    Returns:
+        Rotation matrix corresponding to the quaternion -- size = [B, 3, 3]
+    """
+    norm_quat = quat
+    norm_quat = norm_quat / norm_quat.norm(p=2, dim=1, keepdim=True)
+    w, x, y, z = norm_quat[:, 0], norm_quat[:, 1], norm_quat[:,
+                                                             2], norm_quat[:,
+                                                                           3]
+    B = quat.size(0)
+    w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
+    wx, wy, wz = w * x, w * y, w * z
+    xy, xz, yz = x * y, x * z, y * z
+    rotMat = torch.stack([
+        w2 + x2 - y2 - z2, 2 * xy - 2 * wz, 2 * wy + 2 * xz, 2 * wz + 2 * xy,
+        w2 - x2 + y2 - z2, 2 * yz - 2 * wx, 2 * xz - 2 * wy, 2 * wx + 2 * yz,
+        w2 - x2 - y2 + z2
+    ],
+        dim=1).view(B, 3, 3)
+    return rotMat
+def rotation_matrix_to_angle_axis(rotation_matrix):
+    """
+    This function is borrowed from https://github.com/kornia/kornia
+    Convert 3x4 rotation matrix to Rodrigues vector
+    Args:
+        rotation_matrix (Tensor): rotation matrix.
+    Returns:
+        Tensor: Rodrigues vector transformation.
+    Shape:
+        - Input: :math:`(N, 3, 4)`
+        - Output: :math:`(N, 3)`
+    Example:
+        >>> input = torch.rand(2, 3, 4)  # Nx4x4
+        >>> output = tgm.rotation_matrix_to_angle_axis(input)  # Nx3
+    """
+    if rotation_matrix.shape[1:] == (3, 3):
+        rot_mat = rotation_matrix.reshape(-1, 3, 3)
+        hom = torch.tensor([0, 0, 1],
+                           dtype=torch.float32,
+                           device=rotation_matrix.device).reshape(
+                               1, 3, 1).expand(rot_mat.shape[0], -1, -1)
+        rotation_matrix = torch.cat([rot_mat, hom], dim=-1)
+    quaternion = rotation_matrix_to_quaternion(rotation_matrix)
+    aa = quaternion_to_angle_axis(quaternion)
+    aa[torch.isnan(aa)] = 0.0
+    return aa
+def quaternion_to_angle_axis(quaternion: torch.Tensor) -> torch.Tensor:
+    """
+    This function is borrowed from https://github.com/kornia/kornia
+    Convert quaternion vector to angle axis of rotation.
+    Adapted from ceres C++ library: ceres-solver/include/ceres/rotation.h
+    Args:
+        quaternion (torch.Tensor): tensor with quaternions.
+    Return:
+        torch.Tensor: tensor with angle axis of rotation.
+    Shape:
+        - Input: :math:`(*, 4)` where `*` means, any number of dimensions
+        - Output: :math:`(*, 3)`
+    Example:
+        >>> quaternion = torch.rand(2, 4)  # Nx4
+        >>> angle_axis = tgm.quaternion_to_angle_axis(quaternion)  # Nx3
+    """
+    if not torch.is_tensor(quaternion):
+        raise TypeError("Input type is not a torch.Tensor. Got {}".format(
+            type(quaternion)))
+    if not quaternion.shape[-1] == 4:
+        raise ValueError(
+            "Input must be a tensor of shape Nx4 or 4. Got {}".format(
+                quaternion.shape))
+    # unpack input and compute conversion
+    q1: torch.Tensor = quaternion[..., 1]
+    q2: torch.Tensor = quaternion[..., 2]
+    q3: torch.Tensor = quaternion[..., 3]
+    sin_squared_theta: torch.Tensor = q1 * q1 + q2 * q2 + q3 * q3
+    sin_theta: torch.Tensor = torch.sqrt(sin_squared_theta)
+    cos_theta: torch.Tensor = quaternion[..., 0]
+    two_theta: torch.Tensor = 2.0 * torch.where(
+        cos_theta < 0.0, torch.atan2(-sin_theta, -cos_theta),
+        torch.atan2(sin_theta, cos_theta))
+    k_pos: torch.Tensor = two_theta / sin_theta
+    k_neg: torch.Tensor = 2.0 * torch.ones_like(sin_theta)
+    k: torch.Tensor = torch.where(sin_squared_theta > 0.0, k_pos, k_neg)
+    angle_axis: torch.Tensor = torch.zeros_like(quaternion)[..., :3]
+    angle_axis[..., 0] += q1 * k
+    angle_axis[..., 1] += q2 * k
+    angle_axis[..., 2] += q3 * k
+    return angle_axis
+def rotation_matrix_to_quaternion(rotation_matrix, eps=1e-6):
+    """
+    This function is borrowed from https://github.com/kornia/kornia
+    Convert 3x4 rotation matrix to 4d quaternion vector
+    This algorithm is based on algorithm described in
+    https://github.com/KieranWynn/pyquaternion/blob/master/pyquaternion/quaternion.py#L201
+    Args:
+        rotation_matrix (Tensor): the rotation matrix to convert.
+    Return:
+        Tensor: the rotation in quaternion
+    Shape:
+        - Input: :math:`(N, 3, 4)`
+        - Output: :math:`(N, 4)`
+    Example:
+        >>> input = torch.rand(4, 3, 4)  # Nx3x4
+        >>> output = tgm.rotation_matrix_to_quaternion(input)  # Nx4
+    """
+    if not torch.is_tensor(rotation_matrix):
+        raise TypeError("Input type is not a torch.Tensor. Got {}".format(
+            type(rotation_matrix)))
+    if len(rotation_matrix.shape) > 3:
+        raise ValueError(
+            "Input size must be a three dimensional tensor. Got {}".format(
+                rotation_matrix.shape))
+    if not rotation_matrix.shape[-2:] == (3, 4):
+        raise ValueError(
+            "Input size must be a N x 3 x 4  tensor. Got {}".format(
+                rotation_matrix.shape))
+    rmat_t = torch.transpose(rotation_matrix, 1, 2)
+    mask_d2 = rmat_t[:, 2, 2] < eps
+    mask_d0_d1 = rmat_t[:, 0, 0] > rmat_t[:, 1, 1]
+    mask_d0_nd1 = rmat_t[:, 0, 0] < -rmat_t[:, 1, 1]
+    t0 = 1 + rmat_t[:, 0, 0] - rmat_t[:, 1, 1] - rmat_t[:, 2, 2]
+    q0 = torch.stack([
+        rmat_t[:, 1, 2] - rmat_t[:, 2, 1], t0,
+        rmat_t[:, 0, 1] + rmat_t[:, 1, 0], rmat_t[:, 2, 0] + rmat_t[:, 0, 2]
+    ], -1)
+    t0_rep = t0.repeat(4, 1).t()
+    t1 = 1 - rmat_t[:, 0, 0] + rmat_t[:, 1, 1] - rmat_t[:, 2, 2]
+    q1 = torch.stack([
+        rmat_t[:, 2, 0] - rmat_t[:, 0, 2], rmat_t[:, 0, 1] + rmat_t[:, 1, 0],
+        t1, rmat_t[:, 1, 2] + rmat_t[:, 2, 1]
+    ], -1)
+    t1_rep = t1.repeat(4, 1).t()
+    t2 = 1 - rmat_t[:, 0, 0] - rmat_t[:, 1, 1] + rmat_t[:, 2, 2]
+    q2 = torch.stack([
+        rmat_t[:, 0, 1] - rmat_t[:, 1, 0], rmat_t[:, 2, 0] + rmat_t[:, 0, 2],
+        rmat_t[:, 1, 2] + rmat_t[:, 2, 1], t2
+    ], -1)
+    t2_rep = t2.repeat(4, 1).t()
+    t3 = 1 + rmat_t[:, 0, 0] + rmat_t[:, 1, 1] + rmat_t[:, 2, 2]
+    q3 = torch.stack([
+        t3, rmat_t[:, 1, 2] - rmat_t[:, 2, 1],
+        rmat_t[:, 2, 0] - rmat_t[:, 0, 2], rmat_t[:, 0, 1] - rmat_t[:, 1, 0]
+    ], -1)
+    t3_rep = t3.repeat(4, 1).t()
+    mask_c0 = mask_d2 * mask_d0_d1
+    mask_c1 = mask_d2 * ~mask_d0_d1
+    mask_c2 = ~mask_d2 * mask_d0_nd1
+    mask_c3 = ~mask_d2 * ~mask_d0_nd1
+    mask_c0 = mask_c0.view(-1, 1).type_as(q0)
+    mask_c1 = mask_c1.view(-1, 1).type_as(q1)
+    mask_c2 = mask_c2.view(-1, 1).type_as(q2)
+    mask_c3 = mask_c3.view(-1, 1).type_as(q3)
+    q = q0 * mask_c0 + q1 * mask_c1 + q2 * mask_c2 + q3 * mask_c3
+    q /= torch.sqrt(t0_rep * mask_c0 + t1_rep * mask_c1 +  # noqa
+                    t2_rep * mask_c2 + t3_rep * mask_c3)  # noqa
+    q *= 0.5
+    return q
+def rot6d_to_rotmat(x):
+    """Convert 6D rotation representation to 3x3 rotation matrix.
+    Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019
+    Input:
+        (B,6) Batch of 6-D rotation representations
+    Output:
+        (B,3,3) Batch of corresponding rotation matrices
+    """
+    x = x.view(-1, 3, 2)
+    a1 = x[:, :, 0]
+    a2 = x[:, :, 1]
+    b1 = F.normalize(a1)
+    b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1)
+    b3 = torch.cross(b1, b2)
+    return torch.stack((b1, b2, b3), dim=-1)
+def projection(pred_joints, pred_camera, retain_z=False):
+    pred_cam_t = torch.stack([
+        pred_camera[:, 1], pred_camera[:, 2], 2 * 5000. /
+        (224. * pred_camera[:, 0] + 1e-9)
+    ],
+        dim=-1)
+    batch_size = pred_joints.shape[0]
+    camera_center = torch.zeros(batch_size, 2)
+    pred_keypoints_2d = perspective_projection(
+        pred_joints,
+        rotation=torch.eye(3).unsqueeze(0).expand(batch_size, -1,
+                                                  -1).to(pred_joints.device),
+        translation=pred_cam_t,
+        focal_length=5000.,
+        camera_center=camera_center,
+        retain_z=retain_z)
+    # Normalize keypoints to [-1,1]
+    pred_keypoints_2d = pred_keypoints_2d / (224. / 2.)
+    return pred_keypoints_2d
+def perspective_projection(points,
+                           rotation,
+                           translation,
+                           focal_length,
+                           camera_center,
+                           retain_z=False):
+    """
+    This function computes the perspective projection of a set of points.
+    Input:
+        points (bs, N, 3): 3D points
+        rotation (bs, 3, 3): Camera rotation
+        translation (bs, 3): Camera translation
+        focal_length (bs,) or scalar: Focal length
+        camera_center (bs, 2): Camera center
+    """
+    batch_size = points.shape[0]
+    K = torch.zeros([batch_size, 3, 3], device=points.device)
+    K[:, 0, 0] = focal_length
+    K[:, 1, 1] = focal_length
+    K[:, 2, 2] = 1.
+    K[:, :-1, -1] = camera_center
+    # Transform points
+    points = torch.einsum('bij,bkj->bki', rotation, points)
+    points = points + translation.unsqueeze(1)
+    # Apply perspective distortion
+    projected_points = points / points[:, :, -1].unsqueeze(-1)
+    # Apply camera intrinsics
+    projected_points = torch.einsum('bij,bkj->bki', K, projected_points)
+    if retain_z:
+        return projected_points
+    else:
+        return projected_points[:, :, :-1]
+def estimate_translation_np(S,
+                            joints_2d,
+                            joints_conf,
+                            focal_length=5000,
+                            img_size=224):
+    """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
+    Input:
+        S: (25, 3) 3D joint locations
+        joints: (25, 3) 2D joint locations and confidence
+    Returns:
+        (3,) camera translation vector
+    """
+    num_joints = S.shape[0]
+    # focal length
+    f = np.array([focal_length, focal_length])
+    # optical center
+    center = np.array([img_size / 2., img_size / 2.])
+    # transformations
+    Z = np.reshape(np.tile(S[:, 2], (2, 1)).T, -1)
+    XY = np.reshape(S[:, 0:2], -1)
+    O = np.tile(center, num_joints)
+    F = np.tile(f, num_joints)
+    weight2 = np.reshape(np.tile(np.sqrt(joints_conf), (2, 1)).T, -1)
+    # least squares
+    Q = np.array([
+        F * np.tile(np.array([1, 0]), num_joints),
+        F * np.tile(np.array([0, 1]), num_joints),
+        O - np.reshape(joints_2d, -1)
+    ]).T
+    c = (np.reshape(joints_2d, -1) - O) * Z - F * XY
+    # weighted least squares
+    W = np.diagflat(weight2)
+    Q = np.dot(W, Q)
+    c = np.dot(W, c)
+    # square matrix
+    A = np.dot(Q.T, Q)
+    b = np.dot(Q.T, c)
+    # solution
+    trans = np.linalg.solve(A, b)
+    return trans
+def estimate_translation(S, joints_2d, focal_length=5000., img_size=224.):
+    """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
+    Input:
+        S: (B, 49, 3) 3D joint locations
+        joints: (B, 49, 3) 2D joint locations and confidence
+    Returns:
+        (B, 3) camera translation vectors
+    """
+    device = S.device
+    # Use only joints 25:49 (GT joints)
+    S = S[:, 25:, :].cpu().numpy()
+    joints_2d = joints_2d[:, 25:, :].cpu().numpy()
+    joints_conf = joints_2d[:, :, -1]
+    joints_2d = joints_2d[:, :, :-1]
+    trans = np.zeros((S.shape[0], 3), dtype=np.float32)
+    # Find the translation for each example in the batch
+    for i in range(S.shape[0]):
+        S_i = S[i]
+        joints_i = joints_2d[i]
+        conf_i = joints_conf[i]
+        trans[i] = estimate_translation_np(S_i,
+                                           joints_i,
+                                           conf_i,
+                                           focal_length=focal_length,
+                                           img_size=img_size)
+    return torch.from_numpy(trans).to(device)
+def Rot_y(angle, category='torch', prepend_dim=True, device=None):
+    '''Rotate around y-axis by angle
+        Args:
+                category: 'torch' or 'numpy'
+                prepend_dim: prepend an extra dimension
+        Return: Rotation matrix with shape [1, 3, 3] (prepend_dim=True)
+        '''
+    m = np.array([[np.cos(angle), 0., np.sin(angle)], [0., 1., 0.],
+                  [-np.sin(angle), 0., np.cos(angle)]])
+    if category == 'torch':
+        if prepend_dim:
+            return torch.tensor(m, dtype=torch.float,
+                                device=device).unsqueeze(0)
+        else:
+            return torch.tensor(m, dtype=torch.float, device=device)
+    elif category == 'numpy':
+        if prepend_dim:
+            return np.expand_dims(m, 0)
+        else:
+            return m
+    else:
+        raise ValueError("category must be 'torch' or 'numpy'")
+def Rot_x(angle, category='torch', prepend_dim=True, device=None):
+    '''Rotate around x-axis by angle
+        Args:
+                category: 'torch' or 'numpy'
+                prepend_dim: prepend an extra dimension
+        Return: Rotation matrix with shape [1, 3, 3] (prepend_dim=True)
+        '''
+    m = np.array([[1., 0., 0.], [0., np.cos(angle), -np.sin(angle)],
+                  [0., np.sin(angle), np.cos(angle)]])
+    if category == 'torch':
+        if prepend_dim:
+            return torch.tensor(m, dtype=torch.float,
+                                device=device).unsqueeze(0)
+        else:
+            return torch.tensor(m, dtype=torch.float, device=device)
+    elif category == 'numpy':
+        if prepend_dim:
+            return np.expand_dims(m, 0)
+        else:
+            return m
+    else:
+        raise ValueError("category must be 'torch' or 'numpy'")
+def Rot_z(angle, category='torch', prepend_dim=True, device=None):
+    '''Rotate around z-axis by angle
+        Args:
+                category: 'torch' or 'numpy'
+                prepend_dim: prepend an extra dimension
+        Return: Rotation matrix with shape [1, 3, 3] (prepend_dim=True)
+        '''
+    m = np.array([[np.cos(angle), -np.sin(angle), 0.],
+                  [np.sin(angle), np.cos(angle), 0.], [0., 0., 1.]])
+    if category == 'torch':
+        if prepend_dim:
+            return torch.tensor(m, dtype=torch.float,
+                                device=device).unsqueeze(0)
+        else:
+            return torch.tensor(m, dtype=torch.float, device=device)
+    elif category == 'numpy':
+        if prepend_dim:
+            return np.expand_dims(m, 0)
+        else:
+            return m
+    else:
+        raise ValueError("category must be 'torch' or 'numpy'")

lib / pymaf /utils / imutils.py ADDED Viewed

	@@ -0,0 +1,491 @@

+"""
+This file contains functions that are used to perform data augmentation.
+"""
+import cv2
+import io
+import torch
+import numpy as np
+from PIL import Image
+from rembg import remove
+from rembg.session_factory import new_session
+from torchvision.models import detection
+from lib.pymaf.core import constants
+from lib.pymaf.utils.streamer import aug_matrix
+from lib.common.cloth_extraction import load_segmentation
+from torchvision import transforms
+def load_img(img_file):
+    img = cv2.imread(img_file, cv2.IMREAD_UNCHANGED)
+    if len(img.shape) == 2:
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+    if not img_file.endswith("png"):
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    else:
+        img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
+    return img
+def get_bbox(img, det):
+    input = np.float32(img)
+    input = (input / 255.0 -
+             (0.5, 0.5, 0.5)) / (0.5, 0.5, 0.5)  # TO [-1.0, 1.0]
+    input = input.transpose(2, 0, 1)  # TO [3 x H x W]
+    bboxes, probs = det(torch.from_numpy(input).float().unsqueeze(0))
+    probs = probs.unsqueeze(3)
+    bboxes = (bboxes * probs).sum(dim=1, keepdim=True) / probs.sum(
+        dim=1, keepdim=True)
+    bbox = bboxes[0, 0, 0].cpu().numpy()
+    return bbox
+# Michael Black is
+def get_transformer(input_res):
+    image_to_tensor = transforms.Compose([
+        transforms.Resize(input_res),
+        transforms.ToTensor(),
+        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+    ])
+    mask_to_tensor = transforms.Compose([
+        transforms.Resize(input_res),
+        transforms.ToTensor(),
+        transforms.Normalize((0.0, ), (1.0, ))
+    ])
+    image_to_pymaf_tensor = transforms.Compose([
+        transforms.Resize(size=224),
+        transforms.Normalize(mean=constants.IMG_NORM_MEAN,
+                             std=constants.IMG_NORM_STD)
+    ])
+    image_to_pixie_tensor = transforms.Compose([
+        transforms.Resize(224)
+    ])
+    def image_to_hybrik_tensor(img):
+        # mean
+        img[0].add_(-0.406)
+        img[1].add_(-0.457)
+        img[2].add_(-0.480)
+        # std
+        img[0].div_(0.225)
+        img[1].div_(0.224)
+        img[2].div_(0.229)
+        return img
+    return [image_to_tensor, mask_to_tensor, image_to_pymaf_tensor, image_to_pixie_tensor, image_to_hybrik_tensor]
+def process_image(img_file, hps_type, input_res=512, device=None, seg_path=None):
+    """Read image, do preprocessing and possibly crop it according to the bounding box.
+    If there are bounding box annotations, use them to crop the image.
+    If no bounding box is specified but openpose detections are available, use them to get the bounding box.
+    """
+    [image_to_tensor, mask_to_tensor, image_to_pymaf_tensor,
+        image_to_pixie_tensor, image_to_hybrik_tensor] = get_transformer(input_res)
+    img_ori = load_img(img_file)
+    in_height, in_width, _ = img_ori.shape
+    M = aug_matrix(in_width, in_height, input_res*2, input_res*2)
+    # from rectangle to square
+    img_for_crop = cv2.warpAffine(img_ori, M[0:2, :],
+                                  (input_res*2, input_res*2), flags=cv2.INTER_CUBIC)
+    # detection for bbox
+    detector = detection.maskrcnn_resnet50_fpn(pretrained=True)
+    detector.eval()
+    predictions = detector(
+        [torch.from_numpy(img_for_crop).permute(2, 0, 1) / 255.])[0]
+    human_ids = torch.where(
+        predictions["scores"] == predictions["scores"][predictions['labels'] == 1].max())
+    bbox = predictions["boxes"][human_ids, :].flatten().detach().cpu().numpy()
+    width = bbox[2] - bbox[0]
+    height = bbox[3] - bbox[1]
+    center = np.array([(bbox[0] + bbox[2]) / 2.0,
+                        (bbox[1] + bbox[3]) / 2.0])
+    scale = max(height, width) / 180
+    if hps_type == 'hybrik':
+        img_np = crop_for_hybrik(img_for_crop, center,
+                                 np.array([scale * 180, scale * 180]))
+    else:
+        img_np, cropping_parameters = crop(
+            img_for_crop, center, scale, (input_res, input_res))
+    img_pil = Image.fromarray(remove(img_np, post_process_mask=True, session=new_session("u2net")))
+    # for icon
+    img_rgb = image_to_tensor(img_pil.convert("RGB"))
+    img_mask = torch.tensor(1.0) - (mask_to_tensor(img_pil.split()[-1]) <
+                                    torch.tensor(0.5)).float()
+    img_tensor = img_rgb * img_mask
+    # for hps
+    img_hps = img_np.astype(np.float32) / 255.
+    img_hps = torch.from_numpy(img_hps).permute(2, 0, 1)
+    if hps_type == 'bev':
+        img_hps = img_np[:, :, [2, 1, 0]]
+    elif hps_type == 'hybrik':
+        img_hps = image_to_hybrik_tensor(img_hps).unsqueeze(0).to(device)
+    elif hps_type != 'pixie':
+        img_hps = image_to_pymaf_tensor(img_hps).unsqueeze(0).to(device)
+    else:
+        img_hps = image_to_pixie_tensor(img_hps).unsqueeze(0).to(device)
+    # uncrop params
+    uncrop_param = {'center': center,
+                    'scale': scale,
+                    'ori_shape': img_ori.shape,
+                    'box_shape': img_np.shape,
+                    'crop_shape': img_for_crop.shape,
+                    'M': M}
+    if not (seg_path is None):
+        segmentations = load_segmentation(seg_path, (in_height, in_width))
+        seg_coord_normalized = []
+        for seg in segmentations:
+            coord_normalized = []
+            for xy in seg['coordinates']:
+                xy_h = np.vstack((xy[:, 0], xy[:, 1], np.ones(len(xy)))).T
+                warped_indeces = M[0:2, :] @ xy_h[:, :, None]
+                warped_indeces = np.array(warped_indeces).astype(int)
+                warped_indeces.resize((warped_indeces.shape[:2]))
+                # cropped_indeces = crop_segmentation(warped_indeces, center, scale, (input_res, input_res), img_np.shape)
+                cropped_indeces = crop_segmentation(
+                    warped_indeces, (input_res, input_res), cropping_parameters)
+                indices = np.vstack(
+                    (cropped_indeces[:, 0], cropped_indeces[:, 1])).T
+                # Convert to NDC coordinates
+                seg_cropped_normalized = 2*(indices / input_res) - 1
+                # Don't know why we need to divide by 50 but it works ¯\_(ツ)_/¯ (probably some scaling factor somewhere)
+                # Divide only by 45 on the horizontal axis to take the curve of the human body into account
+                seg_cropped_normalized[:, 0] = (
+                    1/40) * seg_cropped_normalized[:, 0]
+                seg_cropped_normalized[:, 1] = (
+                    1/50) * seg_cropped_normalized[:, 1]
+                coord_normalized.append(seg_cropped_normalized)
+            seg['coord_normalized'] = coord_normalized
+            seg_coord_normalized.append(seg)
+        return img_tensor, img_hps, img_ori, img_mask, uncrop_param, seg_coord_normalized
+    return img_tensor, img_hps, img_ori, img_mask, uncrop_param
+def get_transform(center, scale, res):
+    """Generate transformation matrix."""
+    h = 200 * scale
+    t = np.zeros((3, 3))
+    t[0, 0] = float(res[1]) / h
+    t[1, 1] = float(res[0]) / h
+    t[0, 2] = res[1] * (-float(center[0]) / h + .5)
+    t[1, 2] = res[0] * (-float(center[1]) / h + .5)
+    t[2, 2] = 1
+    return t
+def transform(pt, center, scale, res, invert=0):
+    """Transform pixel location to different reference."""
+    t = get_transform(center, scale, res)
+    if invert:
+        t = np.linalg.inv(t)
+    new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.]).T
+    new_pt = np.dot(t, new_pt)
+    return np.around(new_pt[:2]).astype(np.int16)
+def crop(img, center, scale, res):
+    """Crop image according to the supplied bounding box."""
+    # Upper left point
+    ul = np.array(transform([0, 0], center, scale, res, invert=1))
+    # Bottom right point
+    br = np.array(transform(res, center, scale, res, invert=1))
+    new_shape = [br[1] - ul[1], br[0] - ul[0]]
+    if len(img.shape) > 2:
+        new_shape += [img.shape[2]]
+    new_img = np.zeros(new_shape)
+    # Range to fill new array
+    new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
+    new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
+    # Range to sample from original image
+    old_x = max(0, ul[0]), min(len(img[0]), br[0])
+    old_y = max(0, ul[1]), min(len(img), br[1])
+    new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]
+            ] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]]
+    if len(img.shape) == 2:
+        new_img = np.array(Image.fromarray(new_img).resize(res))
+    else:
+        new_img = np.array(Image.fromarray(
+            new_img.astype(np.uint8)).resize(res))
+    return new_img, (old_x, new_x, old_y, new_y, new_shape)
+def crop_segmentation(org_coord, res, cropping_parameters):
+    old_x, new_x, old_y, new_y, new_shape = cropping_parameters
+    new_coord = np.zeros((org_coord.shape))
+    new_coord[:, 0] = new_x[0] + (org_coord[:, 0] - old_x[0])
+    new_coord[:, 1] = new_y[0] + (org_coord[:, 1] - old_y[0])
+    new_coord[:, 0] = res[0] * (new_coord[:, 0] / new_shape[1])
+    new_coord[:, 1] = res[1] * (new_coord[:, 1] / new_shape[0])
+    return new_coord
+def crop_for_hybrik(img, center, scale):
+    inp_h, inp_w = (256, 256)
+    trans = get_affine_transform(center, scale, 0, [inp_w, inp_h])
+    new_img = cv2.warpAffine(
+        img, trans, (int(inp_w), int(inp_h)), flags=cv2.INTER_LINEAR)
+    return new_img
+def get_affine_transform(center,
+                         scale,
+                         rot,
+                         output_size,
+                         shift=np.array([0, 0], dtype=np.float32),
+                         inv=0):
+    def get_dir(src_point, rot_rad):
+        """Rotate the point by `rot_rad` degree."""
+        sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+        src_result = [0, 0]
+        src_result[0] = src_point[0] * cs - src_point[1] * sn
+        src_result[1] = src_point[0] * sn + src_point[1] * cs
+        return src_result
+    def get_3rd_point(a, b):
+        """Return vector c that perpendicular to (a - b)."""
+        direct = a - b
+        return b + np.array([-direct[1], direct[0]], dtype=np.float32)
+    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
+        scale = np.array([scale, scale])
+    scale_tmp = scale
+    src_w = scale_tmp[0]
+    dst_w = output_size[0]
+    dst_h = output_size[1]
+    rot_rad = np.pi * rot / 180
+    src_dir = get_dir([0, src_w * -0.5], rot_rad)
+    dst_dir = np.array([0, dst_w * -0.5], np.float32)
+    src = np.zeros((3, 2), dtype=np.float32)
+    dst = np.zeros((3, 2), dtype=np.float32)
+    src[0, :] = center + scale_tmp * shift
+    src[1, :] = center + src_dir + scale_tmp * shift
+    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
+    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+    src[2:, :] = get_3rd_point(src[0, :], src[1, :])
+    dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
+    if inv:
+        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+    else:
+        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+    return trans
+def corner_align(ul, br):
+    if ul[1]-ul[0] != br[1]-br[0]:
+        ul[1] = ul[0]+br[1]-br[0]
+    return ul, br
+def uncrop(img, center, scale, orig_shape):
+    """'Undo' the image cropping/resizing.
+    This function is used when evaluating mask/part segmentation.
+    """
+    res = img.shape[:2]
+    # Upper left point
+    ul = np.array(transform([0, 0], center, scale, res, invert=1))
+    # Bottom right point
+    br = np.array(transform(res, center, scale, res, invert=1))
+    # quick fix
+    ul, br = corner_align(ul, br)
+    # size of cropped image
+    crop_shape = [br[1] - ul[1], br[0] - ul[0]]
+    new_img = np.zeros(orig_shape, dtype=np.uint8)
+    # Range to fill new array
+    new_x = max(0, -ul[0]), min(br[0], orig_shape[1]) - ul[0]
+    new_y = max(0, -ul[1]), min(br[1], orig_shape[0]) - ul[1]
+    # Range to sample from original image
+    old_x = max(0, ul[0]), min(orig_shape[1], br[0])
+    old_y = max(0, ul[1]), min(orig_shape[0], br[1])
+    img = np.array(Image.fromarray(img.astype(np.uint8)).resize(crop_shape))
+    new_img[old_y[0]:old_y[1], old_x[0]:old_x[1]
+            ] = img[new_y[0]:new_y[1], new_x[0]:new_x[1]]
+    return new_img
+def rot_aa(aa, rot):
+    """Rotate axis angle parameters."""
+    # pose parameters
+    R = np.array([[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
+                  [np.sin(np.deg2rad(-rot)),
+                   np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]])
+    # find the rotation of the body in camera frame
+    per_rdg, _ = cv2.Rodrigues(aa)
+    # apply the global rotation to the global orientation
+    resrot, _ = cv2.Rodrigues(np.dot(R, per_rdg))
+    aa = (resrot.T)[0]
+    return aa
+def flip_img(img):
+    """Flip rgb images or masks.
+    channels come last, e.g. (256,256,3).
+    """
+    img = np.fliplr(img)
+    return img
+def flip_kp(kp, is_smpl=False):
+    """Flip keypoints."""
+    if len(kp) == 24:
+        if is_smpl:
+            flipped_parts = constants.SMPL_JOINTS_FLIP_PERM
+        else:
+            flipped_parts = constants.J24_FLIP_PERM
+    elif len(kp) == 49:
+        if is_smpl:
+            flipped_parts = constants.SMPL_J49_FLIP_PERM
+        else:
+            flipped_parts = constants.J49_FLIP_PERM
+    kp = kp[flipped_parts]
+    kp[:, 0] = -kp[:, 0]
+    return kp
+def flip_pose(pose):
+    """Flip pose.
+    The flipping is based on SMPL parameters.
+    """
+    flipped_parts = constants.SMPL_POSE_FLIP_PERM
+    pose = pose[flipped_parts]
+    # we also negate the second and the third dimension of the axis-angle
+    pose[1::3] = -pose[1::3]
+    pose[2::3] = -pose[2::3]
+    return pose
+def normalize_2d_kp(kp_2d, crop_size=224, inv=False):
+    # Normalize keypoints between -1, 1
+    if not inv:
+        ratio = 1.0 / crop_size
+        kp_2d = 2.0 * kp_2d * ratio - 1.0
+    else:
+        ratio = 1.0 / crop_size
+        kp_2d = (kp_2d + 1.0) / (2 * ratio)
+    return kp_2d
+def generate_heatmap(joints, heatmap_size, sigma=1, joints_vis=None):
+    '''
+    param joints:  [num_joints, 3]
+    param joints_vis: [num_joints, 3]
+    return: target, target_weight(1: visible, 0: invisible)
+    '''
+    num_joints = joints.shape[0]
+    device = joints.device
+    cur_device = torch.device(device.type, device.index)
+    if not hasattr(heatmap_size, '__len__'):
+        # width  height
+        heatmap_size = [heatmap_size, heatmap_size]
+    assert len(heatmap_size) == 2
+    target_weight = np.ones((num_joints, 1), dtype=np.float32)
+    if joints_vis is not None:
+        target_weight[:, 0] = joints_vis[:, 0]
+    target = torch.zeros((num_joints, heatmap_size[1], heatmap_size[0]),
+                         dtype=torch.float32,
+                         device=cur_device)
+    tmp_size = sigma * 3
+    for joint_id in range(num_joints):
+        mu_x = int(joints[joint_id][0] * heatmap_size[0] + 0.5)
+        mu_y = int(joints[joint_id][1] * heatmap_size[1] + 0.5)
+        # Check that any part of the gaussian is in-bounds
+        ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+        br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+        if ul[0] >= heatmap_size[0] or ul[1] >= heatmap_size[1] \
+                or br[0] < 0 or br[1] < 0:
+            # If not, just return the image as is
+            target_weight[joint_id] = 0
+            continue
+        # # Generate gaussian
+        size = 2 * tmp_size + 1
+        # x = np.arange(0, size, 1, np.float32)
+        # y = x[:, np.newaxis]
+        # x0 = y0 = size // 2
+        # # The gaussian is not normalized, we want the center value to equal 1
+        # g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
+        # g = torch.from_numpy(g.astype(np.float32))
+        x = torch.arange(0, size, dtype=torch.float32, device=cur_device)
+        y = x.unsqueeze(-1)
+        x0 = y0 = size // 2
+        # The gaussian is not normalized, we want the center value to equal 1
+        g = torch.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
+        # Usable gaussian range
+        g_x = max(0, -ul[0]), min(br[0], heatmap_size[0]) - ul[0]
+        g_y = max(0, -ul[1]), min(br[1], heatmap_size[1]) - ul[1]
+        # Image range
+        img_x = max(0, ul[0]), min(br[0], heatmap_size[0])
+        img_y = max(0, ul[1]), min(br[1], heatmap_size[1])
+        v = target_weight[joint_id]
+        if v > 0.5:
+            target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
+                g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+    return target, target_weight

lib / pymaf /utils / streamer.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import cv2
+import torch
+import numpy as np
+import imageio
+def aug_matrix(w1, h1, w2, h2):
+    dx = (w2 - w1) / 2.0
+    dy = (h2 - h1) / 2.0
+    matrix_trans = np.array([[1.0, 0, dx],
+                             [0, 1.0, dy],
+                             [0, 0,   1.0]])
+    scale = np.min([float(w2)/w1, float(h2)/h1])
+    M = get_affine_matrix(
+        center=(w2 / 2.0, h2 / 2.0),
+        translate=(0, 0),
+        scale=scale)
+    M = np.array(M + [0., 0., 1.]).reshape(3, 3)
+    M = M.dot(matrix_trans)
+    return M
+def get_affine_matrix(center, translate, scale):
+    cx, cy = center
+    tx, ty = translate
+    M = [1, 0, 0,
+         0, 1, 0]
+    M = [x * scale for x in M]
+    # Apply translation and of center translation: RSS * C^-1
+    M[2] += M[0] * (-cx) + M[1] * (-cy)
+    M[5] += M[3] * (-cx) + M[4] * (-cy)
+    # Apply center translation: T * C * RSS * C^-1
+    M[2] += cx + tx
+    M[5] += cy + ty
+    return M
+class BaseStreamer():
+    """This streamer will return images at 512x512 size.
+    """
+    def __init__(self,
+                 width=512, height=512, pad=True,
+                 mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
+                 **kwargs):
+        self.width = width
+        self.height = height
+        self.pad = pad
+        self.mean = np.array(mean)
+        self.std = np.array(std)
+        self.loader = self.create_loader()
+    def create_loader(self):
+        raise NotImplementedError
+        yield np.zeros((600, 400, 3))  # in RGB (0, 255)
+    def __getitem__(self, index):
+        image = next(self.loader)
+        in_height, in_width, _ = image.shape
+        M = aug_matrix(in_width, in_height, self.width, self.height, self.pad)
+        image = cv2.warpAffine(
+            image, M[0:2, :], (self.width, self.height), flags=cv2.INTER_CUBIC)
+        input = np.float32(image)
+        input = (input / 255.0 - self.mean) / self.std  # TO [-1.0, 1.0]
+        input = input.transpose(2, 0, 1)  # TO [3 x H x W]
+        return torch.from_numpy(input).float()
+    def __len__(self):
+        raise NotImplementedError
+class CaptureStreamer(BaseStreamer):
+    """This streamer takes webcam as input.
+    """
+    def __init__(self, id=0, width=512, height=512, pad=True, **kwargs):
+        super().__init__(width, height, pad, **kwargs)
+        self.capture = cv2.VideoCapture(id)
+    def create_loader(self):
+        while True:
+            _, image = self.capture.read()
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # RGB
+            yield image
+    def __len__(self):
+        return 100_000_000
+    def __del__(self):
+        self.capture.release()
+class VideoListStreamer(BaseStreamer):
+    """This streamer takes a list of video files as input.
+    """
+    def __init__(self, files, width=512, height=512, pad=True, **kwargs):
+        super().__init__(width, height, pad, **kwargs)
+        self.files = files
+        self.captures = [imageio.get_reader(f) for f in files]
+        self.nframes = sum([int(cap._meta["fps"] * cap._meta["duration"])
+                            for cap in self.captures])
+    def create_loader(self):
+        for capture in self.captures:
+            for image in capture:  # RGB
+                yield image
+    def __len__(self):
+        return self.nframes
+    def __del__(self):
+        for capture in self.captures:
+            capture.close()
+class ImageListStreamer(BaseStreamer):
+    """This streamer takes a list of image files as input.
+    """
+    def __init__(self, files, width=512, height=512, pad=True, **kwargs):
+        super().__init__(width, height, pad, **kwargs)
+        self.files = files
+    def create_loader(self):
+        for f in self.files:
+            image = cv2.imread(f, cv2.IMREAD_UNCHANGED)[:, :, 0:3]
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # RGB
+            yield image
+    def __len__(self):
+        return len(self.files)

lib / pymaf /utils /transforms.py ADDED Viewed

	@@ -0,0 +1,78 @@

+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao ([email protected])
+# ------------------------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+import numpy as np
+def transform_preds(coords, center, scale, output_size):
+    target_coords = np.zeros(coords.shape)
+    trans = get_affine_transform(center, scale, 0, output_size, inv=1)
+    for p in range(coords.shape[0]):
+        target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
+    return target_coords
+def get_affine_transform(center,
+                         scale,
+                         rot,
+                         output_size,
+                         shift=np.array([0, 0], dtype=np.float32),
+                         inv=0):
+    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
+        # print(scale)
+        scale = np.array([scale, scale])
+    scale_tmp = scale * 200.0
+    src_w = scale_tmp[0]
+    dst_w = output_size[0]
+    dst_h = output_size[1]
+    rot_rad = np.pi * rot / 180
+    src_dir = get_dir([0, src_w * -0.5], rot_rad)
+    dst_dir = np.array([0, dst_w * -0.5], np.float32)
+    src = np.zeros((3, 2), dtype=np.float32)
+    dst = np.zeros((3, 2), dtype=np.float32)
+    src[0, :] = center + scale_tmp * shift
+    src[1, :] = center + src_dir + scale_tmp * shift
+    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
+    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+    src[2:, :] = get_3rd_point(src[0, :], src[1, :])
+    dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
+    if inv:
+        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+    else:
+        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+    return trans
+def affine_transform(pt, t):
+    new_pt = np.array([pt[0], pt[1], 1.]).T
+    new_pt = np.dot(t, new_pt)
+    return new_pt[:2]
+def get_3rd_point(a, b):
+    direct = a - b
+    return b + np.array([-direct[1], direct[0]], dtype=np.float32)
+def get_dir(src_point, rot_rad):
+    sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+    src_result = [0, 0]
+    src_result[0] = src_point[0] * cs - src_point[1] * sn
+    src_result[1] = src_point[0] * sn + src_point[1] * cs
+    return src_result

lib / renderer / __init__.py ADDED Viewed

File without changes

lib / renderer / camera.py ADDED Viewed

	@@ -0,0 +1,226 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: [email protected]
+import cv2
+import numpy as np
+from .glm import ortho
+class Camera:
+    def __init__(self, width=1600, height=1200):
+        # Focal Length
+        # equivalent 50mm
+        focal = np.sqrt(width * width + height * height)
+        self.focal_x = focal
+        self.focal_y = focal
+        # Principal Point Offset
+        self.principal_x = width / 2
+        self.principal_y = height / 2
+        # Axis Skew
+        self.skew = 0
+        # Image Size
+        self.width = width
+        self.height = height
+        self.near = 1
+        self.far = 10
+        # Camera Center
+        self.center = np.array([0, 0, 1.6])
+        self.direction = np.array([0, 0, -1])
+        self.right = np.array([1, 0, 0])
+        self.up = np.array([0, 1, 0])
+        self.ortho_ratio = None
+    def sanity_check(self):
+        self.center = self.center.reshape([-1])
+        self.direction = self.direction.reshape([-1])
+        self.right = self.right.reshape([-1])
+        self.up = self.up.reshape([-1])
+        assert len(self.center) == 3
+        assert len(self.direction) == 3
+        assert len(self.right) == 3
+        assert len(self.up) == 3
+    @staticmethod
+    def normalize_vector(v):
+        v_norm = np.linalg.norm(v)
+        return v if v_norm == 0 else v / v_norm
+    def get_real_z_value(self, z):
+        z_near = self.near
+        z_far = self.far
+        z_n = 2.0 * z - 1.0
+        z_e = 2.0 * z_near * z_far / (z_far + z_near - z_n * (z_far - z_near))
+        return z_e
+    def get_rotation_matrix(self):
+        rot_mat = np.eye(3)
+        s = self.right
+        s = self.normalize_vector(s)
+        rot_mat[0, :] = s
+        u = self.up
+        u = self.normalize_vector(u)
+        rot_mat[1, :] = -u
+        rot_mat[2, :] = self.normalize_vector(self.direction)
+        return rot_mat
+    def get_translation_vector(self):
+        rot_mat = self.get_rotation_matrix()
+        trans = -np.dot(rot_mat, self.center)
+        return trans
+    def get_intrinsic_matrix(self):
+        int_mat = np.eye(3)
+        int_mat[0, 0] = self.focal_x
+        int_mat[1, 1] = self.focal_y
+        int_mat[0, 1] = self.skew
+        int_mat[0, 2] = self.principal_x
+        int_mat[1, 2] = self.principal_y
+        return int_mat
+    def get_projection_matrix(self):
+        ext_mat = self.get_extrinsic_matrix()
+        int_mat = self.get_intrinsic_matrix()
+        return np.matmul(int_mat, ext_mat)
+    def get_extrinsic_matrix(self):
+        rot_mat = self.get_rotation_matrix()
+        int_mat = self.get_intrinsic_matrix()
+        trans = self.get_translation_vector()
+        extrinsic = np.eye(4)
+        extrinsic[:3, :3] = rot_mat
+        extrinsic[:3, 3] = trans
+        return extrinsic[:3, :]
+    def set_rotation_matrix(self, rot_mat):
+        self.direction = rot_mat[2, :]
+        self.up = -rot_mat[1, :]
+        self.right = rot_mat[0, :]
+    def set_intrinsic_matrix(self, int_mat):
+        self.focal_x = int_mat[0, 0]
+        self.focal_y = int_mat[1, 1]
+        self.skew = int_mat[0, 1]
+        self.principal_x = int_mat[0, 2]
+        self.principal_y = int_mat[1, 2]
+    def set_projection_matrix(self, proj_mat):
+        res = cv2.decomposeProjectionMatrix(proj_mat)
+        int_mat, rot_mat, camera_center_homo = res[0], res[1], res[2]
+        camera_center = camera_center_homo[0:3] / camera_center_homo[3]
+        camera_center = camera_center.reshape(-1)
+        int_mat = int_mat / int_mat[2][2]
+        self.set_intrinsic_matrix(int_mat)
+        self.set_rotation_matrix(rot_mat)
+        self.center = camera_center
+        self.sanity_check()
+    def get_gl_matrix(self):
+        z_near = self.near
+        z_far = self.far
+        rot_mat = self.get_rotation_matrix()
+        int_mat = self.get_intrinsic_matrix()
+        trans = self.get_translation_vector()
+        extrinsic = np.eye(4)
+        extrinsic[:3, :3] = rot_mat
+        extrinsic[:3, 3] = trans
+        axis_adj = np.eye(4)
+        axis_adj[2, 2] = -1
+        axis_adj[1, 1] = -1
+        model_view = np.matmul(axis_adj, extrinsic)
+        projective = np.zeros([4, 4])
+        projective[:2, :2] = int_mat[:2, :2]
+        projective[:2, 2:3] = -int_mat[:2, 2:3]
+        projective[3, 2] = -1
+        projective[2, 2] = (z_near + z_far)
+        projective[2, 3] = (z_near * z_far)
+        if self.ortho_ratio is None:
+            ndc = ortho(0, self.width, 0, self.height, z_near, z_far)
+            perspective = np.matmul(ndc, projective)
+        else:
+            perspective = ortho(-self.width * self.ortho_ratio / 2,
+                                self.width * self.ortho_ratio / 2,
+                                -self.height * self.ortho_ratio / 2,
+                                self.height * self.ortho_ratio / 2, z_near,
+                                z_far)
+        return perspective, model_view
+def KRT_from_P(proj_mat, normalize_K=True):
+    res = cv2.decomposeProjectionMatrix(proj_mat)
+    K, Rot, camera_center_homog = res[0], res[1], res[2]
+    camera_center = camera_center_homog[0:3] / camera_center_homog[3]
+    trans = -Rot.dot(camera_center)
+    if normalize_K:
+        K = K / K[2][2]
+    return K, Rot, trans
+def MVP_from_P(proj_mat, width, height, near=0.1, far=10000):
+    '''
+    Convert OpenCV camera calibration matrix to OpenGL projection and model view matrix
+    :param proj_mat: OpenCV camera projeciton matrix
+    :param width: Image width
+    :param height: Image height
+    :param near: Z near value
+    :param far: Z far value
+    :return: OpenGL projection matrix and model view matrix
+    '''
+    res = cv2.decomposeProjectionMatrix(proj_mat)
+    K, Rot, camera_center_homog = res[0], res[1], res[2]
+    camera_center = camera_center_homog[0:3] / camera_center_homog[3]
+    trans = -Rot.dot(camera_center)
+    K = K / K[2][2]
+    extrinsic = np.eye(4)
+    extrinsic[:3, :3] = Rot
+    extrinsic[:3, 3:4] = trans
+    axis_adj = np.eye(4)
+    axis_adj[2, 2] = -1
+    axis_adj[1, 1] = -1
+    model_view = np.matmul(axis_adj, extrinsic)
+    zFar = far
+    zNear = near
+    projective = np.zeros([4, 4])
+    projective[:2, :2] = K[:2, :2]
+    projective[:2, 2:3] = -K[:2, 2:3]
+    projective[3, 2] = -1
+    projective[2, 2] = (zNear + zFar)
+    projective[2, 3] = (zNear * zFar)
+    ndc = ortho(0, width, 0, height, zNear, zFar)
+    perspective = np.matmul(ndc, projective)
+    return perspective, model_view

lib / renderer / gl / __init__.py ADDED Viewed

File without changes

lib / renderer / gl / data / color.fs ADDED Viewed

	@@ -0,0 +1,20 @@

+#version 330 core
+layout (location = 0) out vec4 FragColor;
+layout (location = 1) out vec4 FragNormal;
+layout (location = 2) out vec4 FragDepth;
+in vec3 Color;
+in vec3 CamNormal;
+in vec3 depth;
+void main()
+{
+    FragColor = vec4(Color,1.0);
+    vec3 cam_norm_normalized = normalize(CamNormal);
+    vec3 rgb = (cam_norm_normalized + 1.0) / 2.0;
+	FragNormal = vec4(rgb, 1.0);
+    FragDepth = vec4(depth.xyz, 1.0);
+}

lib / renderer / gl / data /color.vs ADDED Viewed

	@@ -0,0 +1,29 @@

+#version 330 core
+layout (location = 0) in vec3 a_Position;
+layout (location = 1) in vec3 a_Color;
+layout (location = 2) in vec3 a_Normal;
+out vec3 CamNormal;
+out vec3 CamPos;
+out vec3 Color;
+out vec3 depth;
+uniform mat3 RotMat;
+uniform mat4 NormMat;
+uniform mat4 ModelMat;
+uniform mat4 PerspMat;
+void main()
+{
+    vec3 a_Position = (NormMat * vec4(a_Position,1.0)).xyz;
+    gl_Position = PerspMat * ModelMat * vec4(RotMat * a_Position, 1.0);
+    Color = a_Color;
+    mat3 R = mat3(ModelMat) * RotMat;
+    CamNormal = (R * a_Normal);
+    depth = vec3(gl_Position.z / gl_Position.w);
+}

lib / renderer / gl / data /normal.fs ADDED Viewed

	@@ -0,0 +1,12 @@

+#version 330
+out vec4 FragColor;
+in vec3 CamNormal;
+void main()
+{
+    vec3 cam_norm_normalized = normalize(CamNormal);
+    vec3 rgb = (cam_norm_normalized + 1.0) / 2.0;
+	FragColor = vec4(rgb, 1.0);
+}

lib / renderer / gl / data /normal.vs ADDED Viewed

	@@ -0,0 +1,15 @@

+#version 330
+layout (location = 0) in vec3 Position;
+layout (location = 1) in vec3 Normal;
+out vec3 CamNormal;
+uniform mat4 ModelMat;
+uniform mat4 PerspMat;
+void main()
+{
+	gl_Position = PerspMat * ModelMat * vec4(Position, 1.0);
+	CamNormal = (ModelMat * vec4(Normal, 0.0)).xyz;
+}

lib / renderer / gl / data /prt.fs ADDED Viewed

	@@ -0,0 +1,157 @@

+#version 330
+uniform vec3 SHCoeffs[9];
+uniform uint analytic;
+uniform uint hasNormalMap;
+uniform uint hasAlbedoMap;
+uniform sampler2D AlbedoMap;
+uniform sampler2D NormalMap;
+in VertexData {
+    vec3 Position;
+    vec3 Depth;
+    vec3 ModelNormal;
+    vec2 Texcoord;
+    vec3 Tangent;
+    vec3 Bitangent;
+    vec3 PRT1;
+    vec3 PRT2;
+    vec3 PRT3;
+    vec3 Label;
+} VertexIn;
+layout (location = 0) out vec4 FragColor;
+layout (location = 1) out vec4 FragNormal;
+layout (location = 2) out vec4 FragPosition;
+layout (location = 3) out vec4 FragAlbedo;
+layout (location = 4) out vec4 FragShading;
+layout (location = 5) out vec4 FragPRT1;
+layout (location = 6) out vec4 FragPRT2;
+// layout (location = 7) out vec4 FragPRT3;
+layout (location = 7) out vec4 FragLabel;
+vec4 gammaCorrection(vec4 vec, float g)
+{
+    return vec4(pow(vec.x, 1.0/g), pow(vec.y, 1.0/g), pow(vec.z, 1.0/g), vec.w);
+}
+vec3 gammaCorrection(vec3 vec, float g)
+{
+    return vec3(pow(vec.x, 1.0/g), pow(vec.y, 1.0/g), pow(vec.z, 1.0/g));
+}
+void evaluateH(vec3 n, out float H[9])
+{
+    float c1 = 0.429043, c2 = 0.511664,
+        c3 = 0.743125, c4 = 0.886227, c5 = 0.247708;
+    H[0] = c4;
+    H[1] = 2.0 * c2 * n[1];
+    H[2] = 2.0 * c2 * n[2];
+    H[3] = 2.0 * c2 * n[0];
+    H[4] = 2.0 * c1 * n[0] * n[1];
+    H[5] = 2.0 * c1 * n[1] * n[2];
+    H[6] = c3 * n[2] * n[2] - c5;
+    H[7] = 2.0 * c1 * n[2] * n[0];
+    H[8] = c1 * (n[0] * n[0] - n[1] * n[1]);
+}
+vec3 evaluateLightingModel(vec3 normal)
+{
+    float H[9];
+    evaluateH(normal, H);
+    vec3 res = vec3(0.0);
+    for (int i = 0; i < 9; i++) {
+        res += H[i] * SHCoeffs[i];
+    }
+    return res;
+}
+// nC: coarse geometry normal, nH: fine normal from normal map
+vec3 evaluateLightingModelHybrid(vec3 nC, vec3 nH, mat3 prt)
+{
+    float HC[9], HH[9];
+    evaluateH(nC, HC);
+    evaluateH(nH, HH);
+    vec3 res = vec3(0.0);
+    vec3 shadow = vec3(0.0);
+    vec3 unshadow = vec3(0.0);
+    for(int i = 0; i < 3; ++i){
+        for(int j = 0; j < 3; ++j){
+            int id = i*3+j;
+            res += HH[id]* SHCoeffs[id];
+            shadow += prt[i][j] * SHCoeffs[id];
+            unshadow += HC[id] * SHCoeffs[id];
+        }
+    }
+    vec3 ratio = clamp(shadow/unshadow,0.0,1.0);
+    res = ratio * res;
+    return res;
+}
+vec3 evaluateLightingModelPRT(mat3 prt)
+{
+    vec3 res = vec3(0.0);
+    for(int i = 0; i < 3; ++i){
+        for(int j = 0; j < 3; ++j){
+            res += prt[i][j] * SHCoeffs[i*3+j];
+        }
+    }
+    return res;
+}
+void main()
+{
+    vec2 uv = VertexIn.Texcoord;
+    vec3 nC = normalize(VertexIn.ModelNormal);
+    vec3 nml = nC;
+    mat3 prt = mat3(VertexIn.PRT1, VertexIn.PRT2, VertexIn.PRT3);
+    if(hasAlbedoMap == uint(0))
+        FragAlbedo = vec4(1.0);
+    else
+        FragAlbedo = texture(AlbedoMap, uv);//gammaCorrection(texture(AlbedoMap, uv), 1.0/2.2);
+    if(hasNormalMap == uint(0))
+    {
+        if(analytic == uint(0))
+            FragShading = vec4(evaluateLightingModelPRT(prt), 1.0f);
+        else
+            FragShading = vec4(evaluateLightingModel(nC), 1.0f);
+    }
+    else
+    {
+        vec3 n_tan = normalize(texture(NormalMap, uv).rgb*2.0-vec3(1.0));
+        mat3 TBN = mat3(normalize(VertexIn.Tangent),normalize(VertexIn.Bitangent),nC);
+        vec3 nH = normalize(TBN * n_tan);
+        if(analytic == uint(0))
+            FragShading = vec4(evaluateLightingModelHybrid(nC,nH,prt),1.0f);
+        else
+            FragShading = vec4(evaluateLightingModel(nH), 1.0f);
+        nml = nH;
+    }
+    FragShading = gammaCorrection(FragShading, 2.2);
+    FragColor = clamp(FragAlbedo * FragShading, 0.0, 1.0);
+    FragNormal = vec4(0.5*(nml+vec3(1.0)), 1.0);
+    FragPosition = vec4(VertexIn.Depth.xyz, 1.0);
+    FragShading = vec4(clamp(0.5*FragShading.xyz, 0.0, 1.0),1.0);
+    // FragColor = gammaCorrection(clamp(FragAlbedo * FragShading, 0.0, 1.0),2.2);
+    // FragNormal = vec4(0.5*(nml+vec3(1.0)), 1.0);
+    // FragPosition = vec4(VertexIn.Position,VertexIn.Depth.x);
+    // FragShading = vec4(gammaCorrection(clamp(0.5*FragShading.xyz, 0.0, 1.0),2.2),1.0);
+    // FragAlbedo = gammaCorrection(FragAlbedo,2.2);
+    FragPRT1 = vec4(VertexIn.PRT1,1.0);
+    FragPRT2 = vec4(VertexIn.PRT2,1.0);
+    // FragPRT3 = vec4(VertexIn.PRT3,1.0);
+    FragLabel = vec4(VertexIn.Label,1.0);
+}