Spaces:

orhir
/

EdgeCape

Running on Zero

App Files Files Community

orhir commited on Nov 30, 2024

Commit

184241a

verified ·

1 Parent(s): dbe4dc3

Upload 114 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

EdgeCape/VERSION +1 -0
EdgeCape/__init__.py +3 -0
EdgeCape/__pycache__/__init__.cpython-39.pyc +0 -0
EdgeCape/apis/__init__.py +5 -0
EdgeCape/apis/__pycache__/__init__.cpython-39.pyc +0 -0
EdgeCape/apis/__pycache__/test.cpython-39.pyc +0 -0
EdgeCape/apis/__pycache__/train.cpython-39.pyc +0 -0
EdgeCape/apis/test.py +198 -0
EdgeCape/apis/train.py +124 -0
EdgeCape/core/__init__.py +1 -0
EdgeCape/core/__pycache__/__init__.cpython-39.pyc +0 -0
EdgeCape/core/custom_hooks/__pycache__/shuffle_hooks.cpython-39.pyc +0 -0
EdgeCape/core/custom_hooks/shuffle_hooks.py +28 -0
EdgeCape/datasets/__init__.py +3 -0
EdgeCape/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
EdgeCape/datasets/__pycache__/builder.cpython-39.pyc +0 -0
EdgeCape/datasets/builder.py +55 -0
EdgeCape/datasets/datasets/__init__.py +6 -0
EdgeCape/datasets/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
EdgeCape/datasets/datasets/mp100/__init__.py +13 -0
EdgeCape/datasets/datasets/mp100/__pycache__/__init__.cpython-39.pyc +0 -0
EdgeCape/datasets/datasets/mp100/__pycache__/custom_test_dataset.cpython-39.pyc +0 -0
EdgeCape/datasets/datasets/mp100/__pycache__/fewshot_base_dataset.cpython-39.pyc +0 -0
EdgeCape/datasets/datasets/mp100/__pycache__/fewshot_dataset.cpython-39.pyc +0 -0
EdgeCape/datasets/datasets/mp100/__pycache__/test_base_dataset.cpython-39.pyc +0 -0
EdgeCape/datasets/datasets/mp100/__pycache__/test_dataset.cpython-39.pyc +0 -0
EdgeCape/datasets/datasets/mp100/__pycache__/transformer_base_dataset.cpython-39.pyc +0 -0
EdgeCape/datasets/datasets/mp100/__pycache__/transformer_dataset.cpython-39.pyc +0 -0
EdgeCape/datasets/datasets/mp100/custom_test_dataset.py +355 -0
EdgeCape/datasets/datasets/mp100/fewshot_base_dataset.py +223 -0
EdgeCape/datasets/datasets/mp100/fewshot_dataset.py +312 -0
EdgeCape/datasets/datasets/mp100/test_base_dataset.py +226 -0
EdgeCape/datasets/datasets/mp100/test_dataset.py +319 -0
EdgeCape/datasets/datasets/mp100/transformer_base_dataset.py +209 -0
EdgeCape/datasets/datasets/mp100/transformer_dataset.py +319 -0
EdgeCape/datasets/pipelines/__init__.py +8 -0
EdgeCape/datasets/pipelines/__pycache__/__init__.cpython-39.pyc +0 -0
EdgeCape/datasets/pipelines/__pycache__/post_transforms.cpython-39.pyc +0 -0
EdgeCape/datasets/pipelines/__pycache__/top_down_transform.cpython-39.pyc +0 -0
EdgeCape/datasets/pipelines/post_transforms.py +121 -0
EdgeCape/datasets/pipelines/top_down_transform.py +716 -0
EdgeCape/models/__init__.py +3 -0
EdgeCape/models/__pycache__/__init__.cpython-39.pyc +0 -0
EdgeCape/models/backbones/__pycache__/adapter.cpython-39.pyc +0 -0
EdgeCape/models/backbones/__pycache__/dino.cpython-39.pyc +0 -0
EdgeCape/models/backbones/adapter.py +935 -0
EdgeCape/models/backbones/dino.py +206 -0
EdgeCape/models/detectors/EdgeCape.py +392 -0
EdgeCape/models/detectors/__init__.py +3 -0
EdgeCape/models/detectors/__pycache__/EdgeCape.cpython-39.pyc +0 -0

EdgeCape/VERSION ADDED Viewed

	@@ -0,0 +1 @@


1	+ 0.2.0

EdgeCape/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .core import *  # noqa
+from .datasets import *  # noqa
+from .models import *  # noqa

EdgeCape/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (206 Bytes). View file

EdgeCape/apis/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .train import train_model
+__all__ = [
+    'train_model'
+]

EdgeCape/apis/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (217 Bytes). View file

EdgeCape/apis/__pycache__/test.cpython-39.pyc ADDED Viewed

Binary file (5.14 kB). View file

EdgeCape/apis/__pycache__/train.cpython-39.pyc ADDED Viewed

Binary file (3.19 kB). View file

EdgeCape/apis/test.py ADDED Viewed

	@@ -0,0 +1,198 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+import mmcv
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.runner import get_dist_info
+def single_gpu_test(model, data_loader):
+    """Test model with a single gpu.
+    This method tests model with a single gpu and displays test progress bar.
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (nn.Dataloader): Pytorch data loader.
+    Returns:
+        list: The prediction results.
+    """
+    model.eval()
+    results = []
+    dataset = data_loader.dataset
+    prog_bar = mmcv.ProgressBar(len(dataset))
+    for data in data_loader:
+        with torch.no_grad():
+            result = model(return_loss=False, **data)
+        batch_size = len(next(iter(data.values()))[0])
+        # results.append(result)
+        if 'preds' in result:
+            for i in range(batch_size):
+                results.append({
+                    'preds': result['preds'][i][None],
+                    'boxes': result['boxes'][i][None],
+                    'bbox_ids': [result['bbox_ids'][i]],
+                    'image_paths': [result['image_paths'][i]],
+                })
+        # use the first key as main key to calculate the batch size
+        # for _ in range(batch_size):
+        prog_bar.update(batch_size)
+    return results
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+    """Test model with multiple gpus.
+    This method tests model with multiple gpus and collects the results
+    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+    it encodes results to gpu tensors and use gpu communication for results
+    collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+    and collects them by the rank 0 worker.
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (nn.Dataloader): Pytorch data loader.
+        tmpdir (str): Path of directory to save the temporary results from
+            different gpus under cpu mode.
+        gpu_collect (bool): Option to use either gpu or cpu to collect results.
+    Returns:
+        list: The prediction results.
+    """
+    model.eval()
+    results = []
+    dataset = data_loader.dataset
+    rank, world_size = get_dist_info()
+    if rank == 0:
+        prog_bar = mmcv.ProgressBar(len(dataset))
+    for data in data_loader:
+        with torch.no_grad():
+            result = model(return_loss=False, **data)
+        results.append(result)
+        if rank == 0:
+            # use the first key as main key to calculate the batch size
+            batch_size = len(next(iter(data.values())))
+            for _ in range(batch_size * world_size):
+                prog_bar.update()
+    # collect results from all ranks
+    if gpu_collect:
+        results = collect_results_gpu(results, len(dataset))
+    else:
+        results = collect_results_cpu(results, len(dataset), tmpdir)
+    return results
+def collect_results_cpu(result_part, size, tmpdir=None):
+    """Collect results in cpu mode.
+    It saves the results on different gpus to 'tmpdir' and collects
+    them by the rank 0 worker.
+    Args:
+        result_part (list): Results to be collected
+        size (int): Result size.
+        tmpdir (str): Path of directory to save the temporary results from
+            different gpus under cpu mode. Default: None
+    Returns:
+        list: Ordered results.
+    """
+    rank, world_size = get_dist_info()
+    # create a tmp dir if it is not specified
+    if tmpdir is None:
+        MAX_LEN = 512
+        # 32 is whitespace
+        dir_tensor = torch.full((MAX_LEN, ),
+                                32,
+                                dtype=torch.uint8,
+                                device='cuda')
+        if rank == 0:
+            mmcv.mkdir_or_exist('.dist_test')
+            tmpdir = tempfile.mkdtemp(dir='.dist_test')
+            tmpdir = torch.tensor(
+                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+            dir_tensor[:len(tmpdir)] = tmpdir
+        dist.broadcast(dir_tensor, 0)
+        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+    else:
+        mmcv.mkdir_or_exist(tmpdir)
+    # synchronizes all processes to make sure tmpdir exist
+    dist.barrier()
+    # dump the part result to the dir
+    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
+    # synchronizes all processes for loading pickle file
+    dist.barrier()
+    # collect all parts
+    if rank != 0:
+        return None
+    # load results of all parts from tmp dir
+    part_list = []
+    for i in range(world_size):
+        part_file = osp.join(tmpdir, f'part_{i}.pkl')
+        part_list.append(mmcv.load(part_file))
+    # sort the results
+    ordered_results = []
+    for res in zip(*part_list):
+        ordered_results.extend(list(res))
+    # the dataloader may pad some samples
+    ordered_results = ordered_results[:size]
+    # remove tmp dir
+    shutil.rmtree(tmpdir)
+    return ordered_results
+def collect_results_gpu(result_part, size):
+    """Collect results in gpu mode.
+    It encodes results to gpu tensors and use gpu communication for results
+    collection.
+    Args:
+        result_part (list): Results to be collected
+        size (int): Result size.
+    Returns:
+        list: Ordered results.
+    """
+    rank, world_size = get_dist_info()
+    # dump result part to tensor with pickle
+    part_tensor = torch.tensor(
+        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
+    # gather all result part tensor shape
+    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+    shape_list = [shape_tensor.clone() for _ in range(world_size)]
+    dist.all_gather(shape_list, shape_tensor)
+    # padding result part tensor to max length
+    shape_max = torch.tensor(shape_list).max()
+    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+    part_send[:shape_tensor[0]] = part_tensor
+    part_recv_list = [
+        part_tensor.new_zeros(shape_max) for _ in range(world_size)
+    ]
+    # gather all result part
+    dist.all_gather(part_recv_list, part_send)
+    if rank == 0:
+        part_list = []
+        for recv, shape in zip(part_recv_list, shape_list):
+            part_list.append(
+                pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
+        # sort the results
+        ordered_results = []
+        for res in zip(*part_list):
+            ordered_results.extend(list(res))
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        return ordered_results
+    return None

EdgeCape/apis/train.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+import torch
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, OptimizerHook,
+                         build_optimizer)
+from mmpose.core import DistEvalHook, EvalHook, Fp16OptimizerHook
+from mmpose.datasets import build_dataloader
+from mmpose.utils import get_root_logger
+from EdgeCape.core.custom_hooks.shuffle_hooks import ShufflePairedSamplesHook
+def train_model(model,
+                dataset,
+                val_dataset,
+                cfg,
+                distributed=False,
+                validate=False,
+                timestamp=None,
+                meta=None):
+    """Train model entry function.
+    Args:
+        model (nn.Module): The model to be trained.
+        dataset (Dataset): Train dataset.
+        cfg (dict): The config dict for training.
+        distributed (bool): Whether to use distributed training.
+            Default: False.
+        validate (bool): Whether to do evaluation. Default: False.
+        timestamp (str | None): Local time for runner. Default: None.
+        meta (dict | None): Meta dict to record some important information.
+            Default: None
+    """
+    logger = get_root_logger(cfg.log_level)
+    # prepare data loaders
+    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    dataloader_setting = dict(
+        samples_per_gpu=cfg.data.get('samples_per_gpu', {}),
+        workers_per_gpu=cfg.data.get('workers_per_gpu', {}),
+        # cfg.gpus will be ignored if distributed
+        num_gpus=len(cfg.gpu_ids),
+        dist=distributed,
+        seed=cfg.seed,
+        pin_memory=False,
+    )
+    dataloader_setting = dict(dataloader_setting,
+                              **cfg.data.get('train_dataloader', {}))
+    data_loaders = [
+        build_dataloader(ds, **dataloader_setting) for ds in dataset
+    ]
+    # put model on gpus
+    if distributed:
+        find_unused_parameters = cfg.get('find_unused_parameters', True) # NOTE: True has been modified to False for faster training.
+        # Sets the `find_unused_parameters` parameter in
+        # torch.nn.parallel.DistributedDataParallel
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False,
+            find_unused_parameters=find_unused_parameters)
+    else:
+        model = MMDataParallel(
+            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
+    # build runner
+    optimizer = build_optimizer(model, cfg.optimizer)
+    runner = EpochBasedRunner(
+        model,
+        optimizer=optimizer,
+        work_dir=cfg.work_dir,
+        logger=logger,
+        meta=meta)
+    # an ugly workaround to make .log and .log.json filenames the same
+    runner.timestamp = timestamp
+    # fp16 setting
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        optimizer_config = Fp16OptimizerHook(
+            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
+    elif distributed and 'type' not in cfg.optimizer_config:
+        optimizer_config = OptimizerHook(**cfg.optimizer_config)
+    else:
+        optimizer_config = cfg.optimizer_config
+    # register hooks
+    runner.register_training_hooks(cfg.lr_config, optimizer_config,
+                                   cfg.checkpoint_config, cfg.log_config,
+                                   cfg.get('momentum_config', None))
+    if distributed:
+        runner.register_hook(DistSamplerSeedHook())
+    shuffle_cfg = cfg.get('shuffle_cfg', None)
+    if shuffle_cfg is not None:
+        for data_loader in data_loaders:
+            runner.register_hook(ShufflePairedSamplesHook(data_loader, **shuffle_cfg))
+    # register eval hooks
+    if validate:
+        eval_cfg = cfg.get('evaluation', {})
+        eval_cfg['res_folder'] = os.path.join(cfg.work_dir, eval_cfg['res_folder'])
+        dataloader_setting = dict(
+            # samples_per_gpu=cfg.data.get('samples_per_gpu', {}),
+            samples_per_gpu=1,
+            workers_per_gpu=cfg.data.get('workers_per_gpu', {}),
+            # cfg.gpus will be ignored if distributed
+            num_gpus=len(cfg.gpu_ids),
+            dist=distributed,
+            shuffle=False,
+            pin_memory=False,
+        )
+        dataloader_setting = dict(dataloader_setting,
+                                    **cfg.data.get('val_dataloader', {}))
+        val_dataloader = build_dataloader(val_dataset, **dataloader_setting)
+        eval_hook = DistEvalHook if distributed else EvalHook
+        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)

EdgeCape/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

EdgeCape/core/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (152 Bytes). View file

EdgeCape/core/custom_hooks/__pycache__/shuffle_hooks.cpython-39.pyc ADDED Viewed

Binary file (1.27 kB). View file

EdgeCape/core/custom_hooks/shuffle_hooks.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from mmcv.runner import Hook
+from torch.utils.data import DataLoader
+from mmpose.utils import get_root_logger
+class ShufflePairedSamplesHook(Hook):
+    """Non-Distributed ShufflePairedSamples.
+    After each training epoch, run FewShotKeypointDataset.random_paired_samples()
+    """
+    def __init__(self,
+                 dataloader,
+                 interval=1):
+        if not isinstance(dataloader, DataLoader):
+            raise TypeError(f'dataloader must be a pytorch DataLoader, '
+                            f'but got {type(dataloader)}')
+        self.dataloader = dataloader
+        self.interval = interval
+        self.logger = get_root_logger()
+    def after_train_epoch(self, runner):
+        """Called after every training epoch to evaluate the results."""
+        if not self.every_n_epochs(runner, self.interval):
+            return
+        # self.logger.info("Run random_paired_samples()")
+        # self.logger.info(f"Before: {self.dataloader.dataset.paired_samples[0]}")
+        self.dataloader.dataset.random_paired_samples()
+        # self.logger.info(f"After: {self.dataloader.dataset.paired_samples[0]}")

EdgeCape/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .builder import *  # noqa
+from .datasets import *  # noqa
+from .pipelines import *  # noqa

EdgeCape/datasets/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (221 Bytes). View file

EdgeCape/datasets/__pycache__/builder.cpython-39.pyc ADDED Viewed

Binary file (1.9 kB). View file

EdgeCape/datasets/builder.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from mmcv.utils import build_from_cfg
+from torch.utils.data.dataset import ConcatDataset
+from mmpose.datasets.dataset_wrappers import RepeatDataset
+from mmpose.datasets.builder import DATASETS
+def _concat_cfg(cfg):
+    replace = ['ann_file', 'img_prefix']
+    channels = ['num_joints', 'dataset_channel']
+    concat_cfg = []
+    for i in range(len(cfg['type'])):
+        cfg_tmp = cfg.deepcopy()
+        cfg_tmp['type'] = cfg['type'][i]
+        for item in replace:
+            assert item in cfg_tmp
+            assert len(cfg['type']) == len(cfg[item]), (cfg[item])
+            cfg_tmp[item] = cfg[item][i]
+        for item in channels:
+            assert item in cfg_tmp['data_cfg']
+            assert len(cfg['type']) == len(cfg['data_cfg'][item])
+            cfg_tmp['data_cfg'][item] = cfg['data_cfg'][item][i]
+        concat_cfg.append(cfg_tmp)
+    return concat_cfg
+def _check_vaild(cfg):
+    replace = ['num_joints', 'dataset_channel']
+    if isinstance(cfg['data_cfg'][replace[0]], (list, tuple)):
+        for item in replace:
+            cfg['data_cfg'][item] = cfg['data_cfg'][item][0]
+    return cfg
+def build_dataset(cfg, default_args=None):
+    """Build a dataset from config dict.
+    Args:
+        cfg (dict): Config dict. It should at least contain the key "type".
+        default_args (dict, optional): Default initialization arguments.
+            Default: None.
+    Returns:
+        Dataset: The constructed dataset.
+    """
+    if isinstance(cfg['type'], (list, tuple)): # In training, type=TransformerPoseDataset
+        dataset = ConcatDataset(
+            [build_dataset(c, default_args) for c in _concat_cfg(cfg)])
+    elif cfg['type'] == 'RepeatDataset':
+        dataset = RepeatDataset(
+            build_dataset(cfg['dataset'], default_args), cfg['times'])
+    else:
+        cfg = _check_vaild(cfg)
+        dataset = build_from_cfg(cfg, DATASETS, default_args)
+    return dataset

EdgeCape/datasets/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .mp100 import (FewShotKeypointDataset, FewShotBaseDataset,
+                      TransformerBaseDataset, TransformerPoseDataset,)
+__all__ = ['FewShotBaseDataset', 'FewShotKeypointDataset',
+           'TransformerBaseDataset', 'TransformerPoseDataset',
+           ]

EdgeCape/datasets/datasets/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (353 Bytes). View file

EdgeCape/datasets/datasets/mp100/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from .fewshot_dataset import FewShotKeypointDataset
+from .fewshot_base_dataset import FewShotBaseDataset
+from .transformer_dataset import TransformerPoseDataset
+from .transformer_base_dataset import TransformerBaseDataset
+from .test_base_dataset import TestBaseDataset
+from .test_dataset import TestPoseDataset
+from .custom_test_dataset import CustomTestPoseDataset
+__all__ = [
+    'FewShotKeypointDataset', 'FewShotBaseDataset',
+    'TransformerPoseDataset', 'TransformerBaseDataset',
+    'TestBaseDataset', 'TestPoseDataset', 'CustomTestPoseDataset'
+]

EdgeCape/datasets/datasets/mp100/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (663 Bytes). View file

EdgeCape/datasets/datasets/mp100/__pycache__/custom_test_dataset.cpython-39.pyc ADDED Viewed

Binary file (10.2 kB). View file

EdgeCape/datasets/datasets/mp100/__pycache__/fewshot_base_dataset.cpython-39.pyc ADDED Viewed

Binary file (7.22 kB). View file

EdgeCape/datasets/datasets/mp100/__pycache__/fewshot_dataset.cpython-39.pyc ADDED Viewed

Binary file (8.95 kB). View file

EdgeCape/datasets/datasets/mp100/__pycache__/test_base_dataset.cpython-39.pyc ADDED Viewed

Binary file (7.4 kB). View file

EdgeCape/datasets/datasets/mp100/__pycache__/test_dataset.cpython-39.pyc ADDED Viewed

Binary file (9.02 kB). View file

EdgeCape/datasets/datasets/mp100/__pycache__/transformer_base_dataset.cpython-39.pyc ADDED Viewed

Binary file (7.23 kB). View file

EdgeCape/datasets/datasets/mp100/__pycache__/transformer_dataset.cpython-39.pyc ADDED Viewed

Binary file (9.06 kB). View file

EdgeCape/datasets/datasets/mp100/custom_test_dataset.py ADDED Viewed

	@@ -0,0 +1,355 @@

+from mmpose.datasets import DATASETS
+import random
+import numpy as np
+import os
+from collections import OrderedDict
+from xtcocotools.coco import COCO
+from .test_base_dataset import TestBaseDataset
+@DATASETS.register_module()
+class CustomTestPoseDataset(TestBaseDataset):
+    def __init__(self,
+                 ann_file,
+                 img_prefix,
+                 data_cfg,
+                 pipeline,
+                 valid_class_ids,
+                 max_kpt_num=None,
+                 num_shots=1,
+                 num_queries=100,
+                 num_episodes=1,
+                 pck_threshold_list=[0.05, 0.1, 0.15, 0.20, 0.25],
+                 test_mode=True):
+        super().__init__(
+            ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode, PCK_threshold_list=pck_threshold_list)
+        self.ann_info['flip_pairs'] = []
+        self.ann_info['upper_body_ids'] = []
+        self.ann_info['lower_body_ids'] = []
+        self.ann_info['use_different_joint_weights'] = False
+        self.ann_info['joint_weights'] = np.array([1.,],
+            dtype=np.float32).reshape((self.ann_info['num_joints'], 1))
+        self.coco = COCO(ann_file)
+        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
+        self.img_ids = self.coco.getImgIds()
+        cat = None
+        relevant_names = [
+            '000000052046',
+            '000000052152'
+            # '000000027059',
+            # '000000030361'
+            # '000000027936',
+            # 'Pileated_Woodpecker_0004_180307', 'American_Three_Toed_Woodpecker_0019_179870'
+            # '000000016379', '000000008869'
+            # 'commonwarthog_115',
+            # 'commonwarthog_78'
+            # '000000027059', '000000030361', '000000027936'
+            # 'klipspringer_66', '000000008333', '000000026814', '000000047543', '000000052080', 'Common_Tern_0050_148928'
+        ]
+        if len(relevant_names) > 0:
+            if cat is not None:
+                relevant_names = [os.path.join(cat, name) for name in relevant_names]
+                self.img_ids = [img_id for img_id in self.img_ids if self.id2name[img_id] in relevant_names]
+            else:
+                new_ids = []
+                for relevant_name in relevant_names:
+                    new_ids += [img_id for img_id in self.img_ids if relevant_name in self.id2name[img_id]]
+                self.img_ids = new_ids
+        else:
+            self.img_ids = [img_id for img_id in self.img_ids if cat == self.id2name[img_id].split('/')[0]]
+        self.classes = [
+            cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds())
+        ]
+        self.num_classes = len(self.classes)
+        self._class_to_ind = dict(zip(self.classes, self.coco.getCatIds()))
+        self._ind_to_class = dict(zip(self.coco.getCatIds(), self.classes))
+        if valid_class_ids is not None: # None by default
+            self.valid_class_ids = valid_class_ids
+        else:
+            self.valid_class_ids = self.coco.getCatIds()
+        self.valid_classes = [self._ind_to_class[ind] for ind in self.valid_class_ids]
+        self.cats = self.coco.cats
+        self.max_kpt_num = max_kpt_num
+        # Also update self.cat2obj
+        self.db = self._get_db()
+        self.num_shots = num_shots
+        if not test_mode:
+            # Update every training epoch
+            self.random_paired_samples()
+        else:
+            self.num_queries = num_queries
+            self.num_episodes = num_episodes
+            self.make_paired_samples()
+    def random_paired_samples(self):
+        num_datas = [len(self.cat2obj[self._class_to_ind[cls]]) for cls in self.valid_classes]
+        # balance the dataset
+        max_num_data = max(num_datas)
+        all_samples = []
+        for cls in self.valid_class_ids:
+            for i in range(max_num_data):
+                shot = random.sample(self.cat2obj[cls], self.num_shots + 1)
+                all_samples.append(shot)
+        self.paired_samples = np.array(all_samples)
+        np.random.shuffle(self.paired_samples)
+    def make_paired_samples(self):
+        random.seed(1)
+        np.random.seed(0)
+        all_samples = []
+        self.num_episodes = 1000
+        for cls in self.valid_class_ids:
+            for _ in range(self.num_episodes):
+                if self.cat2obj[cls] == []:
+                    continue
+                self.num_queries = 1
+                self.num_shots = 1
+                if len(self.cat2obj[cls]) < self.num_shots + self.num_queries:
+                    shots = random.choices(self.cat2obj[cls], k=self.num_shots + self.num_queries)
+                else:
+                    shots = random.sample(self.cat2obj[cls], self.num_shots + self.num_queries)
+                sample_ids = shots[:self.num_shots]
+                query_ids = shots[self.num_shots:]
+                for query_id in query_ids:
+                    all_samples.append(sample_ids + [query_id])
+                    all_samples.append([query_id] + [query_id])
+        self.paired_samples = np.array(list(set(tuple(x) for x in all_samples)))
+    def _select_kpt(self, obj, kpt_id):
+        obj['joints_3d'] = obj['joints_3d'][kpt_id:kpt_id+1]
+        obj['joints_3d_visible'] = obj['joints_3d_visible'][kpt_id:kpt_id+1]
+        obj['kpt_id'] = kpt_id
+        return obj
+    @staticmethod
+    def _get_mapping_id_name(imgs):
+        """
+        Args:
+            imgs (dict): dict of image info.
+        Returns:
+            tuple: Image name & id mapping dicts.
+            - id2name (dict): Mapping image id to name.
+            - name2id (dict): Mapping image name to id.
+        """
+        id2name = {}
+        name2id = {}
+        for image_id, image in imgs.items():
+            file_name = image['file_name']
+            id2name[image_id] = file_name
+            name2id[file_name] = image_id
+        return id2name, name2id
+    def _get_db(self):
+        """Ground truth bbox and keypoints."""
+        self.obj_id = 0
+        self.cat2obj = {}
+        for i in self.coco.getCatIds():
+            self.cat2obj.update({i: []})
+        gt_db = []
+        for img_id in self.img_ids:
+            gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+        return gt_db
+    def _load_coco_keypoint_annotation_kernel(self, img_id):
+        """load annotation from COCOAPI.
+        Note:
+            bbox:[x1, y1, w, h]
+        Args:
+            img_id: coco image id
+        Returns:
+            dict: db entry
+        """
+        img_ann = self.coco.loadImgs(img_id)[0]
+        width = img_ann['width']
+        height = img_ann['height']
+        ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+        objs = self.coco.loadAnns(ann_ids)
+        # sanitize bboxes
+        valid_objs = []
+        for obj in objs:
+            if 'bbox' not in obj:
+                continue
+            x, y, w, h = obj['bbox']
+            x1 = max(0, x)
+            y1 = max(0, y)
+            x2 = min(width - 1, x1 + max(0, w - 1))
+            y2 = min(height - 1, y1 + max(0, h - 1))
+            if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+                obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+                valid_objs.append(obj)
+        objs = valid_objs
+        bbox_id = 0
+        rec = []
+        for obj in objs:
+            if 'keypoints' not in obj:
+                continue
+            if max(obj['keypoints']) == 0:
+                continue
+            if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+                continue
+            category_id = obj['category_id']
+            # the number of keypoint for this specific category
+            cat_kpt_num = int(len(obj['keypoints']) / 3)
+            if self.max_kpt_num is None:
+                kpt_num = cat_kpt_num
+            else:
+                kpt_num = self.max_kpt_num
+            joints_3d = np.zeros((kpt_num, 3), dtype=np.float32)
+            joints_3d_visible = np.zeros((kpt_num, 3), dtype=np.float32)
+            keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+            joints_3d[:cat_kpt_num, :2] = keypoints[:, :2]
+            joints_3d_visible[:cat_kpt_num, :2] = np.minimum(1, keypoints[:, 2:3])
+            center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+            image_file = os.path.join(self.img_prefix, self.id2name[img_id])
+            self.cat2obj[category_id].append(self.obj_id)
+            rec.append({
+                'image_file': image_file,
+                'center': center,
+                'scale': scale,
+                'rotation': 0,
+                'bbox': obj['clean_bbox'][:4],
+                'bbox_score': 1,
+                'joints_3d': joints_3d,
+                'joints_3d_visible': joints_3d_visible,
+                'category_id': category_id,
+                'cat_kpt_num': cat_kpt_num,
+                'bbox_id': self.obj_id,
+                'skeleton': self.coco.cats[obj['category_id']]['skeleton'],
+            })
+            bbox_id = bbox_id + 1
+            self.obj_id += 1
+        return rec
+    def _xywh2cs(self, x, y, w, h):
+        """This encodes bbox(x,y,w,w) into (center, scale)
+        Args:
+            x, y, w, h
+        Returns:
+            tuple: A tuple containing center and scale.
+            - center (np.ndarray[float32](2,)): center of the bbox (x, y).
+            - scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+        """
+        aspect_ratio = self.ann_info['image_size'][0] / self.ann_info['image_size'][1]
+        center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+        #
+        # if (not self.test_mode) and np.random.rand() < 0.3:
+        #     center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+        if w > aspect_ratio * h:
+            h = w * 1.0 / aspect_ratio
+        elif w < aspect_ratio * h:
+            w = h * aspect_ratio
+        # pixel std is 200.0
+        scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+        # padding to include proper amount of context
+        scale = scale * 1.25
+        return center, scale
+    def evaluate(self, outputs, res_folder, metric='PCK', **kwargs):
+        """Evaluate interhand2d keypoint results. The pose prediction results
+        will be saved in `${res_folder}/result_keypoints.json`.
+        Note:
+            batch_size: N
+            num_keypoints: K
+            heatmap height: H
+            heatmap width: W
+        Args:
+            outputs (list(preds, boxes, image_path, output_heatmap))
+                :preds (np.ndarray[N,K,3]): The first two dimensions are
+                    coordinates, score is the third dimension of the array.
+                :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0]
+                    , scale[1],area, score]
+                :image_paths (list[str]): For example, ['C', 'a', 'p', 't',
+                    'u', 'r', 'e', '1', '2', '/', '0', '3', '9', '0', '_',
+                    'd', 'h', '_', 't', 'o', 'u', 'c', 'h', 'R', 'O', 'M',
+                    '/', 'c', 'a', 'm', '4', '1', '0', '2', '0', '9', '/',
+                    'i', 'm', 'a', 'g', 'e', '6', '2', '4', '3', '4', '.',
+                    'j', 'p', 'g']
+                :output_heatmap (np.ndarray[N, K, H, W]): model outpus.
+            res_folder (str): Path of directory to save the results.
+            metric (str | list[str]): Metric to be performed.
+                Options: 'PCK', 'AUC', 'EPE'.
+        Returns:
+            dict: Evaluation results for evaluation metric.
+        """
+        metrics = metric if isinstance(metric, list) else [metric]
+        allowed_metrics = ['PCK', 'AUC', 'EPE', 'NME']
+        for metric in metrics:
+            if metric not in allowed_metrics:
+                raise KeyError(f'metric {metric} is not supported')
+        res_file = os.path.join(res_folder, 'result_keypoints.json')
+        kpts = []
+        for output in outputs:
+            preds = output['preds']
+            boxes = output['boxes']
+            image_paths = output['image_paths']
+            bbox_ids = output['bbox_ids']
+            batch_size = len(image_paths)
+            for i in range(batch_size):
+                image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+                kpts.append({
+                    'keypoints': preds[i].tolist(),
+                    'center': boxes[i][0:2].tolist(),
+                    'scale': boxes[i][2:4].tolist(),
+                    'area': float(boxes[i][4]),
+                    'score': float(boxes[i][5]),
+                    'image_id': image_id,
+                    'bbox_id': bbox_ids[i]
+                })
+        kpts = self._sort_and_unique_bboxes(kpts)
+        self._write_keypoint_results(kpts, res_file)
+        info_str = self._report_metric(res_file, metrics)
+        name_value = OrderedDict(info_str)
+        return name_value

EdgeCape/datasets/datasets/mp100/fewshot_base_dataset.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import copy
+from abc import ABCMeta, abstractmethod
+import json_tricks as json
+import numpy as np
+from mmcv.parallel import DataContainer as DC
+from mmpose.core.evaluation.top_down_eval import (keypoint_auc, keypoint_epe,
+                                                  keypoint_pck_accuracy)
+from torch.utils.data import Dataset
+from mmpose.datasets import DATASETS
+from mmpose.datasets.pipelines import Compose
+@DATASETS.register_module()
+class FewShotBaseDataset(Dataset, metaclass=ABCMeta):
+    def __init__(self,
+                 ann_file,
+                 img_prefix,
+                 data_cfg,
+                 pipeline,
+                 test_mode=False):
+        self.image_info = {}
+        self.ann_info = {}
+        self.annotations_path = ann_file
+        if not img_prefix.endswith('/'):
+            img_prefix = img_prefix + '/'
+        self.img_prefix = img_prefix
+        self.pipeline = pipeline
+        self.test_mode = test_mode
+        self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+        self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+        self.ann_info['num_joints'] = data_cfg['num_joints']
+        self.ann_info['flip_pairs'] = None
+        self.ann_info['inference_channel'] = data_cfg['inference_channel']
+        self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+        self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+        self.db = []
+        self.num_shots = 1
+        self.paired_samples = []
+        self.pipeline = Compose(self.pipeline)
+    @abstractmethod
+    def _get_db(self):
+        """Load dataset."""
+        raise NotImplementedError
+    @abstractmethod
+    def _select_kpt(self, obj, kpt_id):
+        """Select kpt."""
+        raise NotImplementedError
+    @abstractmethod
+    def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+        """Evaluate keypoint results."""
+        raise NotImplementedError
+    @staticmethod
+    def _write_keypoint_results(keypoints, res_file):
+        """Write results into a json file."""
+        with open(res_file, 'w') as f:
+            json.dump(keypoints, f, sort_keys=True, indent=4)
+    def _report_metric(self,
+                       res_file,
+                       metrics,
+                       pck_thr=0.2,
+                       pckh_thr=0.7,
+                       auc_nor=30):
+        """Keypoint evaluation.
+        Args:
+            res_file (str): Json file stored prediction results.
+            metrics (str | list[str]): Metric to be performed.
+                Options: 'PCK', 'PCKh', 'AUC', 'EPE'.
+            pck_thr (float): PCK threshold, default as 0.2.
+            pckh_thr (float): PCKh threshold, default as 0.7.
+            auc_nor (float): AUC normalization factor, default as 30 pixel.
+        Returns:
+            List: Evaluation results for evaluation metric.
+        """
+        info_str = []
+        with open(res_file, 'r') as fin:
+            preds = json.load(fin)
+        assert len(preds) == len(self.paired_samples)
+        outputs = []
+        gts = []
+        masks = []
+        threshold_bbox = []
+        threshold_head_box = []
+        for pred, pair in zip(preds, self.paired_samples):
+            item = self.db[pair[-1]]
+            outputs.append(np.array(pred['keypoints'])[:, :-1])
+            gts.append(np.array(item['joints_3d'])[:, :-1])
+            mask_query = ((np.array(item['joints_3d_visible'])[:, 0]) > 0)
+            mask_sample = ((np.array(self.db[pair[0]]['joints_3d_visible'])[:, 0]) > 0)
+            for id_s in pair[:-1]:
+                mask_sample = np.bitwise_and(mask_sample, ((np.array(self.db[id_s]['joints_3d_visible'])[:, 0]) > 0))
+            masks.append(np.bitwise_and(mask_query, mask_sample))
+            if 'PCK' in metrics:
+                bbox = np.array(item['bbox'])
+                bbox_thr = np.max(bbox[2:])
+                threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
+            if 'PCKh' in metrics:
+                head_box_thr = item['head_size']
+                threshold_head_box.append(
+                    np.array([head_box_thr, head_box_thr]))
+        if 'PCK' in metrics:
+            pck_avg = []
+            for (output, gt, mask, thr_bbox) in zip(outputs, gts, masks, threshold_bbox):
+                _, pck, _ = keypoint_pck_accuracy(np.expand_dims(output, 0), np.expand_dims(gt,0), np.expand_dims(mask,0), pck_thr, np.expand_dims(thr_bbox,0))
+                pck_avg.append(pck)
+            info_str.append(('PCK', np.mean(pck_avg)))
+        return info_str
+    def _merge_obj(self, Xs_list, Xq, idx):
+        """ merge Xs_list and Xq.
+        :param Xs_list: N-shot samples X
+        :param Xq: query X
+        :param idx: id of paired_samples
+        :return: Xall
+        """
+        Xall = dict()
+        Xall['img_s'] = [Xs['img'] for Xs in Xs_list]
+        Xall['target_s'] = [Xs['target'] for Xs in Xs_list]
+        Xall['target_weight_s'] = [Xs['target_weight'] for Xs in Xs_list]
+        xs_img_metas = [Xs['img_metas'].data for Xs in Xs_list]
+        Xall['img_q'] = Xq['img']
+        Xall['target_q'] = Xq['target']
+        Xall['target_weight_q'] = Xq['target_weight']
+        xq_img_metas = Xq['img_metas'].data
+        img_metas = dict()
+        for key in xq_img_metas.keys():
+            img_metas['sample_' + key] = [xs_img_meta[key] for xs_img_meta in xs_img_metas]
+            img_metas['query_' + key] = xq_img_metas[key]
+        img_metas['bbox_id'] = idx
+        Xall['img_metas'] = DC(img_metas, cpu_only=True)
+        return Xall
+    def __len__(self):
+        """Get the size of the dataset."""
+        return len(self.paired_samples)
+    def __getitem__(self, idx):
+        """Get the sample given index."""
+        pair_ids = self.paired_samples[idx]
+        assert len(pair_ids) == self.num_shots + 1
+        sample_id_list = pair_ids[:self.num_shots]
+        query_id = pair_ids[-1]
+        sample_obj_list = []
+        for sample_id in sample_id_list:
+            sample_obj = copy.deepcopy(self.db[sample_id])
+            sample_obj['ann_info'] = copy.deepcopy(self.ann_info)
+            sample_obj_list.append(sample_obj)
+        query_obj = copy.deepcopy(self.db[query_id])
+        query_obj['ann_info'] = copy.deepcopy(self.ann_info)
+        if not self.test_mode:
+            # randomly select "one" keypoint
+            sample_valid = (sample_obj_list[0]['joints_3d_visible'][:, 0] > 0)
+            for sample_obj in sample_obj_list:
+                sample_valid = sample_valid & (sample_obj['joints_3d_visible'][:, 0] > 0)
+            query_valid = (query_obj['joints_3d_visible'][:, 0] > 0)
+            valid_s = np.where(sample_valid)[0]
+            valid_q = np.where(query_valid)[0]
+            valid_sq = np.where(sample_valid & query_valid)[0]
+            if len(valid_sq) > 0:
+                kpt_id = np.random.choice(valid_sq)
+            elif len(valid_s) > 0:
+                kpt_id = np.random.choice(valid_s)
+            elif len(valid_q) > 0:
+                kpt_id = np.random.choice(valid_q)
+            else:
+                kpt_id = np.random.choice(np.array(range(len(query_valid))))
+            for i in range(self.num_shots):
+                sample_obj_list[i] = self._select_kpt(sample_obj_list[i], kpt_id)
+            query_obj = self._select_kpt(query_obj, kpt_id)
+        # when test, all keypoints will be preserved.
+        Xs_list = []
+        for sample_obj in sample_obj_list:
+            Xs = self.pipeline(sample_obj)
+            Xs_list.append(Xs)
+        Xq = self.pipeline(query_obj)
+        Xall = self._merge_obj(Xs_list, Xq, idx)
+        Xall['skeleton'] = self.db[query_id]['skeleton']
+        return Xall
+    def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+        """sort kpts and remove the repeated ones."""
+        kpts = sorted(kpts, key=lambda x: x[key])
+        num = len(kpts)
+        for i in range(num - 1, 0, -1):
+            if kpts[i][key] == kpts[i - 1][key]:
+                del kpts[i]
+        return kpts

EdgeCape/datasets/datasets/mp100/fewshot_dataset.py ADDED Viewed

	@@ -0,0 +1,312 @@

+from mmpose.datasets import DATASETS
+import random
+import numpy as np
+import os
+from collections import OrderedDict
+from xtcocotools.coco import COCO
+from .fewshot_base_dataset import FewShotBaseDataset
+@DATASETS.register_module()
+class FewShotKeypointDataset(FewShotBaseDataset):
+    def __init__(self,
+                 ann_file,
+                 img_prefix,
+                 data_cfg,
+                 pipeline,
+                 valid_class_ids,
+                 num_shots = 1,
+                 num_queries = 100,
+                 num_episodes = 1,
+                 test_mode=False):
+        super().__init__(
+            ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode)
+        self.ann_info['flip_pairs'] = []
+        self.ann_info['upper_body_ids'] = []
+        self.ann_info['lower_body_ids'] = []
+        self.ann_info['use_different_joint_weights'] = False
+        self.ann_info['joint_weights'] = np.array([1.,],
+            dtype=np.float32).reshape((self.ann_info['num_joints'], 1))
+        self.coco = COCO(ann_file)
+        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
+        self.img_ids = self.coco.getImgIds()
+        self.classes = [
+            cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds())
+        ]
+        self.num_classes = len(self.classes)
+        self._class_to_ind = dict(zip(self.classes, self.coco.getCatIds()))
+        self._ind_to_class = dict(zip(self.coco.getCatIds(), self.classes))
+        if valid_class_ids is not None:
+            self.valid_class_ids = valid_class_ids
+        else:
+            self.valid_class_ids = self.coco.getCatIds()
+        self.valid_classes = [self._ind_to_class[ind] for ind in self.valid_class_ids]
+        self.cats = self.coco.cats
+        # Also update self.cat2obj
+        self.db = self._get_db()
+        self.num_shots = num_shots
+        if not test_mode:
+            # Update every training epoch
+            self.random_paired_samples()
+        else:
+            self.num_queries = num_queries
+            self.num_episodes = num_episodes
+            self.make_paired_samples()
+    def random_paired_samples(self):
+        num_datas = [len(self.cat2obj[self._class_to_ind[cls]]) for cls in self.valid_classes]
+        # balance the dataset
+        max_num_data = max(num_datas)
+        all_samples = []
+        for cls in self.valid_class_ids:
+            for i in range(max_num_data):
+                shot = random.sample(self.cat2obj[cls], self.num_shots + 1)
+                all_samples.append(shot)
+        self.paired_samples = np.array(all_samples)
+        np.random.shuffle(self.paired_samples)
+    def make_paired_samples(self):
+        random.seed(1)
+        np.random.seed(0)
+        all_samples = []
+        for cls in self.valid_class_ids:
+            for _ in range(self.num_episodes):
+                shots = random.sample(self.cat2obj[cls], self.num_shots + self.num_queries)
+                sample_ids = shots[:self.num_shots]
+                query_ids = shots[self.num_shots:]
+                for query_id in query_ids:
+                    all_samples.append(sample_ids + [query_id])
+        self.paired_samples = np.array(all_samples)
+    def _select_kpt(self, obj, kpt_id):
+        obj['joints_3d'] = obj['joints_3d'][kpt_id:kpt_id+1]
+        obj['joints_3d_visible'] = obj['joints_3d_visible'][kpt_id:kpt_id+1]
+        obj['kpt_id'] = kpt_id
+        return obj
+    @staticmethod
+    def _get_mapping_id_name(imgs):
+        """
+        Args:
+            imgs (dict): dict of image info.
+        Returns:
+            tuple: Image name & id mapping dicts.
+            - id2name (dict): Mapping image id to name.
+            - name2id (dict): Mapping image name to id.
+        """
+        id2name = {}
+        name2id = {}
+        for image_id, image in imgs.items():
+            file_name = image['file_name']
+            id2name[image_id] = file_name
+            name2id[file_name] = image_id
+        return id2name, name2id
+    def _get_db(self):
+        """Ground truth bbox and keypoints."""
+        self.obj_id = 0
+        self.cat2obj = {}
+        for i in self.coco.getCatIds():
+            self.cat2obj.update({i: []})
+        gt_db = []
+        for img_id in self.img_ids:
+            gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+        return gt_db
+    def _load_coco_keypoint_annotation_kernel(self, img_id):
+        """load annotation from COCOAPI.
+        Note:
+            bbox:[x1, y1, w, h]
+        Args:
+            img_id: coco image id
+        Returns:
+            dict: db entry
+        """
+        img_ann = self.coco.loadImgs(img_id)[0]
+        width = img_ann['width']
+        height = img_ann['height']
+        ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+        objs = self.coco.loadAnns(ann_ids)
+        # sanitize bboxes
+        valid_objs = []
+        for obj in objs:
+            if 'bbox' not in obj:
+                continue
+            x, y, w, h = obj['bbox']
+            x1 = max(0, x)
+            y1 = max(0, y)
+            x2 = min(width - 1, x1 + max(0, w - 1))
+            y2 = min(height - 1, y1 + max(0, h - 1))
+            if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+                obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+                valid_objs.append(obj)
+        objs = valid_objs
+        bbox_id = 0
+        rec = []
+        for obj in objs:
+            if 'keypoints' not in obj:
+                continue
+            if max(obj['keypoints']) == 0:
+                continue
+            if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+                continue
+            category_id = obj['category_id']
+            # the number of keypoint for this specific category
+            cat_kpt_num = int(len(obj['keypoints']) / 3)
+            joints_3d = np.zeros((cat_kpt_num, 3), dtype=np.float32)
+            joints_3d_visible = np.zeros((cat_kpt_num, 3), dtype=np.float32)
+            keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+            joints_3d[:, :2] = keypoints[:, :2]
+            joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+            center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+            image_file = os.path.join(self.img_prefix, self.id2name[img_id])
+            self.cat2obj[category_id].append(self.obj_id)
+            rec.append({
+                'image_file': image_file,
+                'center': center,
+                'scale': scale,
+                'rotation': 0,
+                'bbox': obj['clean_bbox'][:4],
+                'bbox_score': 1,
+                'joints_3d': joints_3d,
+                'joints_3d_visible': joints_3d_visible,
+                'category_id': category_id,
+                'cat_kpt_num': cat_kpt_num,
+                'bbox_id': self.obj_id,
+                'skeleton': self.coco.cats[obj['category_id']]['skeleton'],
+            })
+            bbox_id = bbox_id + 1
+            self.obj_id += 1
+        return rec
+    def _xywh2cs(self, x, y, w, h):
+        """This encodes bbox(x,y,w,w) into (center, scale)
+        Args:
+            x, y, w, h
+        Returns:
+            tuple: A tuple containing center and scale.
+            - center (np.ndarray[float32](2,)): center of the bbox (x, y).
+            - scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+        """
+        aspect_ratio = self.ann_info['image_size'][0] / self.ann_info['image_size'][1]
+        center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+        #
+        # if (not self.test_mode) and np.random.rand() < 0.3:
+        #     center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+        if w > aspect_ratio * h:
+            h = w * 1.0 / aspect_ratio
+        elif w < aspect_ratio * h:
+            w = h * aspect_ratio
+        # pixel std is 200.0
+        scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+        # padding to include proper amount of context
+        scale = scale * 1.25
+        return center, scale
+    def evaluate(self, outputs, res_folder, metric='PCK', **kwargs):
+        """Evaluate interhand2d keypoint results. The pose prediction results
+        will be saved in `${res_folder}/result_keypoints.json`.
+        Note:
+            batch_size: N
+            num_keypoints: K
+            heatmap height: H
+            heatmap width: W
+        Args:
+            outputs (list(preds, boxes, image_path, output_heatmap))
+                :preds (np.ndarray[N,K,3]): The first two dimensions are
+                    coordinates, score is the third dimension of the array.
+                :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0]
+                    , scale[1],area, score]
+                :image_paths (list[str]): For example, ['C', 'a', 'p', 't',
+                    'u', 'r', 'e', '1', '2', '/', '0', '3', '9', '0', '_',
+                    'd', 'h', '_', 't', 'o', 'u', 'c', 'h', 'R', 'O', 'M',
+                    '/', 'c', 'a', 'm', '4', '1', '0', '2', '0', '9', '/',
+                    'i', 'm', 'a', 'g', 'e', '6', '2', '4', '3', '4', '.',
+                    'j', 'p', 'g']
+                :output_heatmap (np.ndarray[N, K, H, W]): model outpus.
+            res_folder (str): Path of directory to save the results.
+            metric (str | list[str]): Metric to be performed.
+                Options: 'PCK', 'AUC', 'EPE'.
+        Returns:
+            dict: Evaluation results for evaluation metric.
+        """
+        metrics = metric if isinstance(metric, list) else [metric]
+        allowed_metrics = ['PCK', 'AUC', 'EPE']
+        for metric in metrics:
+            if metric not in allowed_metrics:
+                raise KeyError(f'metric {metric} is not supported')
+        res_file = os.path.join(res_folder, 'result_keypoints.json')
+        kpts = []
+        for output in outputs:
+            preds = output['preds']
+            boxes = output['boxes']
+            image_paths = output['image_paths']
+            bbox_ids = output['bbox_ids']
+            batch_size = len(image_paths)
+            for i in range(batch_size):
+                image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+                kpts.append({
+                    'keypoints': preds[i].tolist(),
+                    'center': boxes[i][0:2].tolist(),
+                    'scale': boxes[i][2:4].tolist(),
+                    'area': float(boxes[i][4]),
+                    'score': float(boxes[i][5]),
+                    'image_id': image_id,
+                    'bbox_id': bbox_ids[i]
+                })
+        kpts = self._sort_and_unique_bboxes(kpts)
+        self._write_keypoint_results(kpts, res_file)
+        info_str = self._report_metric(res_file, metrics)
+        name_value = OrderedDict(info_str)
+        return name_value

EdgeCape/datasets/datasets/mp100/test_base_dataset.py ADDED Viewed

	@@ -0,0 +1,226 @@

+import copy
+from abc import ABCMeta, abstractmethod
+import json_tricks as json
+import numpy as np
+from mmcv.parallel import DataContainer as DC
+from mmpose.core.evaluation.top_down_eval import (keypoint_auc, keypoint_epe, keypoint_nme,
+                                                  keypoint_pck_accuracy)
+from torch.utils.data import Dataset
+from mmpose.datasets import DATASETS
+from mmpose.datasets.pipelines import Compose
+@DATASETS.register_module()
+class TestBaseDataset(Dataset, metaclass=ABCMeta):
+    def __init__(self,
+                 ann_file,
+                 img_prefix,
+                 data_cfg,
+                 pipeline,
+                 test_mode=True,
+                 PCK_threshold_list=[0.05, 0.1, 0.15, 0.2, 0.25]):
+        self.image_info = {}
+        self.ann_info = {}
+        self.annotations_path = ann_file
+        if not img_prefix.endswith('/'):
+            img_prefix = img_prefix + '/'
+        self.img_prefix = img_prefix
+        self.pipeline = pipeline
+        self.test_mode = test_mode
+        self.PCK_threshold_list = PCK_threshold_list
+        self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+        self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+        self.ann_info['num_joints'] = data_cfg['num_joints']
+        self.ann_info['flip_pairs'] = None
+        self.ann_info['inference_channel'] = data_cfg['inference_channel']
+        self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+        self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+        self.db = []
+        self.num_shots = 1
+        self.paired_samples = []
+        self.pipeline = Compose(self.pipeline)
+    @abstractmethod
+    def _get_db(self):
+        """Load dataset."""
+        raise NotImplementedError
+    @abstractmethod
+    def _select_kpt(self, obj, kpt_id):
+        """Select kpt."""
+        raise NotImplementedError
+    @abstractmethod
+    def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+        """Evaluate keypoint results."""
+        raise NotImplementedError
+    @staticmethod
+    def _write_keypoint_results(keypoints, res_file):
+        """Write results into a json file."""
+        with open(res_file, 'w') as f:
+            json.dump(keypoints, f, sort_keys=True, indent=4)
+    def _report_metric(self,
+                       res_file,
+                       metrics):
+        """Keypoint evaluation.
+        Args:
+            res_file (str): Json file stored prediction results.
+            metrics (str | list[str]): Metric to be performed.
+                Options: 'PCK', 'PCKh', 'AUC', 'EPE'.
+            pck_thr (float): PCK threshold, default as 0.2.
+            pckh_thr (float): PCKh threshold, default as 0.7.
+            auc_nor (float): AUC normalization factor, default as 30 pixel.
+        Returns:
+            List: Evaluation results for evaluation metric.
+        """
+        info_str = []
+        with open(res_file, 'r') as fin:
+            preds = json.load(fin)
+        assert len(preds) == len(self.paired_samples)
+        outputs = []
+        gts = []
+        masks = []
+        threshold_bbox = []
+        threshold_head_box = []
+        for pred, pair in zip(preds, self.paired_samples):
+            item = self.db[pair[-1]]
+            outputs.append(np.array(pred['keypoints'])[:, :-1])
+            gts.append(np.array(item['joints_3d'])[:, :-1])
+            mask_query = ((np.array(item['joints_3d_visible'])[:, 0]) > 0)
+            mask_sample = ((np.array(self.db[pair[0]]['joints_3d_visible'])[:, 0]) > 0)
+            for id_s in pair[:-1]:
+                mask_sample = np.bitwise_and(mask_sample, ((np.array(self.db[id_s]['joints_3d_visible'])[:, 0]) > 0))
+            masks.append(np.bitwise_and(mask_query, mask_sample))
+            if 'PCK' in metrics or 'NME' in metrics or 'AUC' in metrics:
+                bbox = np.array(item['bbox'])
+                bbox_thr = np.max(bbox[2:])
+                threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
+            if 'PCKh' in metrics:
+                head_box_thr = item['head_size']
+                threshold_head_box.append(
+                    np.array([head_box_thr, head_box_thr]))
+        if 'PCK' in metrics:
+            pck_results = dict()
+            for pck_thr in self.PCK_threshold_list:
+                pck_results[pck_thr] = []
+            for (output, gt, mask, thr_bbox) in zip(outputs, gts, masks, threshold_bbox):
+                for pck_thr in self.PCK_threshold_list:
+                    _, pck, _ = keypoint_pck_accuracy(np.expand_dims(output, 0), np.expand_dims(gt,0), np.expand_dims(mask,0), pck_thr, np.expand_dims(thr_bbox,0))
+                    pck_results[pck_thr].append(pck)
+            mPCK = 0
+            for pck_thr in self.PCK_threshold_list:
+                info_str.append(['PCK@' + str(pck_thr), np.mean(pck_results[pck_thr])])
+                mPCK += np.mean(pck_results[pck_thr])
+            info_str.append(['mPCK', mPCK / len(self.PCK_threshold_list)])
+        if 'NME' in metrics:
+            nme_results = []
+            for (output, gt, mask, thr_bbox) in zip(outputs, gts, masks, threshold_bbox):
+                nme = keypoint_nme(np.expand_dims(output, 0), np.expand_dims(gt,0), np.expand_dims(mask,0), np.expand_dims(thr_bbox,0))
+                nme_results.append(nme)
+            info_str.append(['NME', np.mean(nme_results)])
+        if 'AUC' in metrics:
+            auc_results = []
+            for (output, gt, mask, thr_bbox) in zip(outputs, gts, masks, threshold_bbox):
+                auc = keypoint_auc(np.expand_dims(output, 0), np.expand_dims(gt,0), np.expand_dims(mask,0), thr_bbox[0])
+                auc_results.append(auc)
+            info_str.append(['AUC', np.mean(auc_results)])
+        if 'EPE' in metrics:
+            epe_results = []
+            for (output, gt, mask) in zip(outputs, gts, masks):
+                epe = keypoint_epe(np.expand_dims(output, 0), np.expand_dims(gt,0), np.expand_dims(mask,0))
+                epe_results.append(epe)
+            info_str.append(['EPE', np.mean(epe_results)])
+        return info_str
+    def _merge_obj(self, Xs_list, Xq, idx):
+        """ merge Xs_list and Xq.
+        :param Xs_list: N-shot samples X
+        :param Xq: query X
+        :param idx: id of paired_samples
+        :return: Xall
+        """
+        Xall = dict()
+        Xall['img_s'] = [Xs['img'] for Xs in Xs_list]
+        Xall['target_s'] = [Xs['target'] for Xs in Xs_list]
+        Xall['target_weight_s'] = [Xs['target_weight'] for Xs in Xs_list]
+        xs_img_metas = [Xs['img_metas'].data for Xs in Xs_list]
+        Xall['img_q'] = Xq['img']
+        Xall['target_q'] = Xq['target']
+        Xall['target_weight_q'] = Xq['target_weight']
+        xq_img_metas = Xq['img_metas'].data
+        img_metas = dict()
+        for key in xq_img_metas.keys():
+            img_metas['sample_' + key] = [xs_img_meta[key] for xs_img_meta in xs_img_metas]
+            img_metas['query_' + key] = xq_img_metas[key]
+        img_metas['bbox_id'] = idx
+        Xall['img_metas'] = DC(img_metas, cpu_only=True)
+        return Xall
+    def __len__(self):
+        """Get the size of the dataset."""
+        return len(self.paired_samples)
+    def __getitem__(self, idx):
+        """Get the sample given index."""
+        pair_ids = self.paired_samples[idx] # [supported id * shots, query id]
+        assert len(pair_ids) == self.num_shots + 1
+        sample_id_list = pair_ids[:self.num_shots]
+        query_id = pair_ids[-1]
+        sample_obj_list = []
+        for sample_id in sample_id_list:
+            sample_obj = copy.deepcopy(self.db[sample_id])
+            sample_obj['ann_info'] = copy.deepcopy(self.ann_info)
+            sample_obj_list.append(sample_obj)
+        query_obj = copy.deepcopy(self.db[query_id])
+        query_obj['ann_info'] = copy.deepcopy(self.ann_info)
+        Xs_list = []
+        for sample_obj in sample_obj_list:
+            Xs = self.pipeline(sample_obj) # dict with ['img', 'target', 'target_weight', 'img_metas'],
+            Xs_list.append(Xs)             # Xs['target'] is of shape [100, map_h, map_w]
+        Xq = self.pipeline(query_obj)
+        Xall = self._merge_obj(Xs_list, Xq, idx)
+        Xall['skeleton'] = self.db[query_id]['skeleton']
+        return Xall
+    def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+        """sort kpts and remove the repeated ones."""
+        kpts = sorted(kpts, key=lambda x: x[key])
+        num = len(kpts)
+        for i in range(num - 1, 0, -1):
+            if kpts[i][key] == kpts[i - 1][key]:
+                del kpts[i]
+        return kpts

EdgeCape/datasets/datasets/mp100/test_dataset.py ADDED Viewed

	@@ -0,0 +1,319 @@

+from mmpose.datasets import DATASETS
+import random
+import numpy as np
+import os
+from collections import OrderedDict
+from xtcocotools.coco import COCO
+from .test_base_dataset import TestBaseDataset
+@DATASETS.register_module()
+class TestPoseDataset(TestBaseDataset):
+    def __init__(self,
+                 ann_file,
+                 img_prefix,
+                 data_cfg,
+                 pipeline,
+                 valid_class_ids,
+                 max_kpt_num=None,
+                 num_shots=1,
+                 num_queries=100,
+                 num_episodes=1,
+                 pck_threshold_list=[0.05, 0.1, 0.15, 0.20, 0.25],
+                 test_mode=True):
+        super().__init__(
+            ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode, PCK_threshold_list=pck_threshold_list)
+        self.ann_info['flip_pairs'] = []
+        self.ann_info['upper_body_ids'] = []
+        self.ann_info['lower_body_ids'] = []
+        self.ann_info['use_different_joint_weights'] = False
+        self.ann_info['joint_weights'] = np.array([1.,],
+            dtype=np.float32).reshape((self.ann_info['num_joints'], 1))
+        self.coco = COCO(ann_file)
+        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
+        self.img_ids = self.coco.getImgIds()
+        self.classes = [
+            cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds())
+        ]
+        self.num_classes = len(self.classes)
+        self._class_to_ind = dict(zip(self.classes, self.coco.getCatIds()))
+        self._ind_to_class = dict(zip(self.coco.getCatIds(), self.classes))
+        if valid_class_ids is not None: # None by default
+            self.valid_class_ids = valid_class_ids
+        else:
+            self.valid_class_ids = self.coco.getCatIds()
+        self.valid_classes = [self._ind_to_class[ind] for ind in self.valid_class_ids]
+        self.cats = self.coco.cats
+        self.max_kpt_num = max_kpt_num
+        # Also update self.cat2obj
+        self.db = self._get_db()
+        self.num_shots = num_shots
+        if not test_mode:
+            # Update every training epoch
+            self.random_paired_samples()
+        else:
+            self.num_queries = num_queries
+            self.num_episodes = num_episodes
+            self.make_paired_samples()
+    def random_paired_samples(self):
+        num_datas = [len(self.cat2obj[self._class_to_ind[cls]]) for cls in self.valid_classes]
+        # balance the dataset
+        max_num_data = max(num_datas)
+        all_samples = []
+        for cls in self.valid_class_ids:
+            for i in range(max_num_data):
+                shot = random.sample(self.cat2obj[cls], self.num_shots + 1)
+                all_samples.append(shot)
+        self.paired_samples = np.array(all_samples)
+        np.random.shuffle(self.paired_samples)
+    def make_paired_samples(self):
+        random.seed(1)
+        np.random.seed(0)
+        all_samples = []
+        for cls in self.valid_class_ids:
+            for _ in range(self.num_episodes):
+                shots = random.sample(self.cat2obj[cls], self.num_shots + self.num_queries)
+                sample_ids = shots[:self.num_shots]
+                query_ids = shots[self.num_shots:]
+                for query_id in query_ids:
+                    all_samples.append(sample_ids + [query_id])
+        self.paired_samples = np.array(all_samples)
+    def _select_kpt(self, obj, kpt_id):
+        obj['joints_3d'] = obj['joints_3d'][kpt_id:kpt_id+1]
+        obj['joints_3d_visible'] = obj['joints_3d_visible'][kpt_id:kpt_id+1]
+        obj['kpt_id'] = kpt_id
+        return obj
+    @staticmethod
+    def _get_mapping_id_name(imgs):
+        """
+        Args:
+            imgs (dict): dict of image info.
+        Returns:
+            tuple: Image name & id mapping dicts.
+            - id2name (dict): Mapping image id to name.
+            - name2id (dict): Mapping image name to id.
+        """
+        id2name = {}
+        name2id = {}
+        for image_id, image in imgs.items():
+            file_name = image['file_name']
+            id2name[image_id] = file_name
+            name2id[file_name] = image_id
+        return id2name, name2id
+    def _get_db(self):
+        """Ground truth bbox and keypoints."""
+        self.obj_id = 0
+        self.cat2obj = {}
+        for i in self.coco.getCatIds():
+            self.cat2obj.update({i: []})
+        gt_db = []
+        for img_id in self.img_ids:
+            gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+        return gt_db
+    def _load_coco_keypoint_annotation_kernel(self, img_id):
+        """load annotation from COCOAPI.
+        Note:
+            bbox:[x1, y1, w, h]
+        Args:
+            img_id: coco image id
+        Returns:
+            dict: db entry
+        """
+        img_ann = self.coco.loadImgs(img_id)[0]
+        width = img_ann['width']
+        height = img_ann['height']
+        ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+        objs = self.coco.loadAnns(ann_ids)
+        # sanitize bboxes
+        valid_objs = []
+        for obj in objs:
+            if 'bbox' not in obj:
+                continue
+            x, y, w, h = obj['bbox']
+            x1 = max(0, x)
+            y1 = max(0, y)
+            x2 = min(width - 1, x1 + max(0, w - 1))
+            y2 = min(height - 1, y1 + max(0, h - 1))
+            if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+                obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+                valid_objs.append(obj)
+        objs = valid_objs
+        bbox_id = 0
+        rec = []
+        for obj in objs:
+            if 'keypoints' not in obj:
+                continue
+            if max(obj['keypoints']) == 0:
+                continue
+            if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+                continue
+            category_id = obj['category_id']
+            # the number of keypoint for this specific category
+            cat_kpt_num = int(len(obj['keypoints']) / 3)
+            if self.max_kpt_num is None:
+                kpt_num = cat_kpt_num
+            else:
+                kpt_num = self.max_kpt_num
+            joints_3d = np.zeros((kpt_num, 3), dtype=np.float32)
+            joints_3d_visible = np.zeros((kpt_num, 3), dtype=np.float32)
+            keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+            joints_3d[:cat_kpt_num, :2] = keypoints[:, :2]
+            joints_3d_visible[:cat_kpt_num, :2] = np.minimum(1, keypoints[:, 2:3])
+            center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+            image_file = os.path.join(self.img_prefix, self.id2name[img_id])
+            self.cat2obj[category_id].append(self.obj_id)
+            rec.append({
+                'image_file': image_file,
+                'center': center,
+                'scale': scale,
+                'rotation': 0,
+                'bbox': obj['clean_bbox'][:4],
+                'bbox_score': 1,
+                'joints_3d': joints_3d,
+                'joints_3d_visible': joints_3d_visible,
+                'category_id': category_id,
+                'cat_kpt_num': cat_kpt_num,
+                'bbox_id': self.obj_id,
+                'skeleton': self.coco.cats[obj['category_id']]['skeleton'],
+            })
+            bbox_id = bbox_id + 1
+            self.obj_id += 1
+        return rec
+    def _xywh2cs(self, x, y, w, h):
+        """This encodes bbox(x,y,w,w) into (center, scale)
+        Args:
+            x, y, w, h
+        Returns:
+            tuple: A tuple containing center and scale.
+            - center (np.ndarray[float32](2,)): center of the bbox (x, y).
+            - scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+        """
+        aspect_ratio = self.ann_info['image_size'][0] / self.ann_info['image_size'][1]
+        center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+        #
+        # if (not self.test_mode) and np.random.rand() < 0.3:
+        #     center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+        if w > aspect_ratio * h:
+            h = w * 1.0 / aspect_ratio
+        elif w < aspect_ratio * h:
+            w = h * aspect_ratio
+        # pixel std is 200.0
+        scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+        # padding to include proper amount of context
+        scale = scale * 1.25
+        return center, scale
+    def evaluate(self, outputs, res_folder, metric='PCK', **kwargs):
+        """Evaluate interhand2d keypoint results. The pose prediction results
+        will be saved in `${res_folder}/result_keypoints.json`.
+        Note:
+            batch_size: N
+            num_keypoints: K
+            heatmap height: H
+            heatmap width: W
+        Args:
+            outputs (list(preds, boxes, image_path, output_heatmap))
+                :preds (np.ndarray[N,K,3]): The first two dimensions are
+                    coordinates, score is the third dimension of the array.
+                :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0]
+                    , scale[1],area, score]
+                :image_paths (list[str]): For example, ['C', 'a', 'p', 't',
+                    'u', 'r', 'e', '1', '2', '/', '0', '3', '9', '0', '_',
+                    'd', 'h', '_', 't', 'o', 'u', 'c', 'h', 'R', 'O', 'M',
+                    '/', 'c', 'a', 'm', '4', '1', '0', '2', '0', '9', '/',
+                    'i', 'm', 'a', 'g', 'e', '6', '2', '4', '3', '4', '.',
+                    'j', 'p', 'g']
+                :output_heatmap (np.ndarray[N, K, H, W]): model outpus.
+            res_folder (str): Path of directory to save the results.
+            metric (str | list[str]): Metric to be performed.
+                Options: 'PCK', 'AUC', 'EPE'.
+        Returns:
+            dict: Evaluation results for evaluation metric.
+        """
+        metrics = metric if isinstance(metric, list) else [metric]
+        allowed_metrics = ['PCK', 'AUC', 'EPE', 'NME']
+        for metric in metrics:
+            if metric not in allowed_metrics:
+                raise KeyError(f'metric {metric} is not supported')
+        res_file = os.path.join(res_folder, 'result_keypoints.json')
+        kpts = []
+        for output in outputs:
+            preds = output['preds']
+            boxes = output['boxes']
+            image_paths = output['image_paths']
+            bbox_ids = output['bbox_ids']
+            batch_size = len(image_paths)
+            for i in range(batch_size):
+                image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+                kpts.append({
+                    'keypoints': preds[i].tolist(),
+                    'center': boxes[i][0:2].tolist(),
+                    'scale': boxes[i][2:4].tolist(),
+                    'area': float(boxes[i][4]),
+                    'score': float(boxes[i][5]),
+                    'image_id': image_id,
+                    'bbox_id': bbox_ids[i]
+                })
+        kpts = self._sort_and_unique_bboxes(kpts)
+        self._write_keypoint_results(kpts, res_file)
+        info_str = self._report_metric(res_file, metrics)
+        name_value = OrderedDict(info_str)
+        return name_value

EdgeCape/datasets/datasets/mp100/transformer_base_dataset.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import copy
+from abc import ABCMeta, abstractmethod
+import json_tricks as json
+import numpy as np
+from mmcv.parallel import DataContainer as DC
+from mmpose.core.evaluation.top_down_eval import (keypoint_auc, keypoint_epe,
+                                                  keypoint_pck_accuracy)
+from torch.utils.data import Dataset
+from mmpose.datasets import DATASETS
+from mmpose.datasets.pipelines import Compose
+@DATASETS.register_module()
+class TransformerBaseDataset(Dataset, metaclass=ABCMeta):
+    def __init__(self,
+                 ann_file,
+                 img_prefix,
+                 data_cfg,
+                 pipeline,
+                 masking_ratio=0.3,
+                 test_mode=False):
+        self.image_info = {}
+        self.ann_info = {}
+        self.annotations_path = ann_file
+        if not img_prefix.endswith('/'):
+            img_prefix = img_prefix + '/'
+        self.img_prefix = img_prefix
+        self.pipeline = pipeline
+        self.test_mode = test_mode
+        self.masking_ratio = masking_ratio
+        self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+        self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+        self.ann_info['num_joints'] = data_cfg['num_joints']
+        self.ann_info['flip_pairs'] = None
+        self.ann_info['inference_channel'] = data_cfg['inference_channel']
+        self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+        self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+        self.db = []
+        self.num_shots = 1
+        self.paired_samples = []
+        self.pipeline = Compose(self.pipeline)
+    @abstractmethod
+    def _get_db(self):
+        """Load dataset."""
+        raise NotImplementedError
+    @abstractmethod
+    def _select_kpt(self, obj, kpt_id):
+        """Select kpt."""
+        raise NotImplementedError
+    @abstractmethod
+    def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+        """Evaluate keypoint results."""
+        raise NotImplementedError
+    @staticmethod
+    def _write_keypoint_results(keypoints, res_file):
+        """Write results into a json file."""
+        with open(res_file, 'w') as f:
+            json.dump(keypoints, f, sort_keys=True, indent=4)
+    def _report_metric(self,
+                       res_file,
+                       metrics,
+                       pck_thr=0.2,
+                       pckh_thr=0.7,
+                       auc_nor=30):
+        """Keypoint evaluation.
+        Args:
+            res_file (str): Json file stored prediction results.
+            metrics (str | list[str]): Metric to be performed.
+                Options: 'PCK', 'PCKh', 'AUC', 'EPE'.
+            pck_thr (float): PCK threshold, default as 0.2.
+            pckh_thr (float): PCKh threshold, default as 0.7.
+            auc_nor (float): AUC normalization factor, default as 30 pixel.
+        Returns:
+            List: Evaluation results for evaluation metric.
+        """
+        info_str = []
+        with open(res_file, 'r') as fin:
+            preds = json.load(fin)
+        assert len(preds) == len(self.paired_samples)
+        outputs = []
+        gts = []
+        masks = []
+        threshold_bbox = []
+        threshold_head_box = []
+        for pred, pair in zip(preds, self.paired_samples):
+            item = self.db[pair[-1]]
+            outputs.append(np.array(pred['keypoints'])[:, :-1])
+            gts.append(np.array(item['joints_3d'])[:, :-1])
+            mask_query = ((np.array(item['joints_3d_visible'])[:, 0]) > 0)
+            mask_sample = ((np.array(self.db[pair[0]]['joints_3d_visible'])[:, 0]) > 0)
+            for id_s in pair[:-1]:
+                mask_sample = np.bitwise_and(mask_sample, ((np.array(self.db[id_s]['joints_3d_visible'])[:, 0]) > 0))
+            masks.append(np.bitwise_and(mask_query, mask_sample))
+            if 'PCK' in metrics:
+                bbox = np.array(item['bbox'])
+                bbox_thr = np.max(bbox[2:])
+                threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
+            if 'PCKh' in metrics:
+                head_box_thr = item['head_size']
+                threshold_head_box.append(
+                    np.array([head_box_thr, head_box_thr]))
+        if 'PCK' in metrics:
+            pck_avg = []
+            for (output, gt, mask, thr_bbox) in zip(outputs, gts, masks, threshold_bbox):
+                _, pck, _ = keypoint_pck_accuracy(np.expand_dims(output, 0), np.expand_dims(gt,0), np.expand_dims(mask,0), pck_thr, np.expand_dims(thr_bbox,0))
+                pck_avg.append(pck)
+            info_str.append(('PCK', np.mean(pck_avg)))
+        return info_str
+    def _merge_obj(self, Xs_list, Xq, idx):
+        """ merge Xs_list and Xq.
+        :param Xs_list: N-shot samples X
+        :param Xq: query X
+        :param idx: id of paired_samples
+        :return: Xall
+        """
+        Xall = dict()
+        Xall['img_s'] = [Xs['img'] for Xs in Xs_list]
+        Xall['target_s'] = [Xs['target'] for Xs in Xs_list]
+        Xall['target_weight_s'] = [Xs['target_weight'] for Xs in Xs_list]
+        xs_img_metas = [Xs['img_metas'].data for Xs in Xs_list]
+        Xall['img_q'] = Xq['img']
+        Xall['target_q'] = Xq['target']
+        Xall['target_weight_q'] = Xq['target_weight']
+        xq_img_metas = Xq['img_metas'].data
+        img_metas = dict()
+        for key in xq_img_metas.keys():
+            img_metas['sample_' + key] = [xs_img_meta[key] for xs_img_meta in xs_img_metas]
+            img_metas['query_' + key] = xq_img_metas[key]
+        img_metas['bbox_id'] = idx
+        Xall['img_metas'] = DC(img_metas, cpu_only=True)
+        return Xall
+    def __len__(self):
+        """Get the size of the dataset."""
+        return len(self.paired_samples)
+    def __getitem__(self, idx):
+        """Get the sample given index."""
+        pair_ids = self.paired_samples[idx] # [supported id * shots, query id]
+        assert len(pair_ids) == self.num_shots + 1
+        sample_id_list = pair_ids[:self.num_shots]
+        query_id = pair_ids[-1]
+        sample_obj_list = []
+        for sample_id in sample_id_list:
+            sample_obj = copy.deepcopy(self.db[sample_id])
+            sample_obj['ann_info'] = copy.deepcopy(self.ann_info)
+            sample_obj_list.append(sample_obj)
+        query_obj = copy.deepcopy(self.db[query_id])
+        query_obj['ann_info'] = copy.deepcopy(self.ann_info)
+        Xs_list = []
+        for sample_obj in sample_obj_list:
+            Xs = self.pipeline(sample_obj) # dict with ['img', 'target', 'target_weight', 'img_metas'],
+            Xs_list.append(Xs)             # Xs['target'] is of shape [100, map_h, map_w]
+        Xq = self.pipeline(query_obj)
+        Xall = self._merge_obj(Xs_list, Xq, idx)
+        Xall['skeleton'] = self.db[query_id]['skeleton']
+        Xall['rand_mask'] = self.rand_mask(Xall['target_weight_s'])
+        return Xall
+    def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+        """sort kpts and remove the repeated ones."""
+        kpts = sorted(kpts, key=lambda x: x[key])
+        num = len(kpts)
+        for i in range(num - 1, 0, -1):
+            if kpts[i][key] == kpts[i - 1][key]:
+                del kpts[i]
+        return kpts
+    def rand_mask(self, target_weight_s):
+        mask_s = target_weight_s[0]
+        for target_weight in target_weight_s:
+            mask_s = mask_s * target_weight
+        num_to_mask = int(np.sum(mask_s) * self.masking_ratio)
+        true_indices = np.where(mask_s == 1)[0]
+        rand_mask = np.random.permutation(true_indices)[:num_to_mask]
+        mask_s[rand_mask] = 0
+        return mask_s

EdgeCape/datasets/datasets/mp100/transformer_dataset.py ADDED Viewed

	@@ -0,0 +1,319 @@

+from mmpose.datasets import DATASETS
+import random
+import numpy as np
+import os
+from collections import OrderedDict
+from xtcocotools.coco import COCO
+from .transformer_base_dataset import TransformerBaseDataset
+@DATASETS.register_module()
+class TransformerPoseDataset(TransformerBaseDataset):
+    def __init__(self,
+                 ann_file,
+                 img_prefix,
+                 data_cfg,
+                 pipeline,
+                 valid_class_ids,
+                 max_kpt_num=None,
+                 num_shots=1,
+                 num_queries=100,
+                 num_episodes=1,
+                 test_mode=False):
+        super().__init__(
+            ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode)
+        self.ann_info['flip_pairs'] = []
+        self.ann_info['upper_body_ids'] = []
+        self.ann_info['lower_body_ids'] = []
+        self.ann_info['use_different_joint_weights'] = False
+        self.ann_info['joint_weights'] = np.array([1.,],
+            dtype=np.float32).reshape((self.ann_info['num_joints'], 1))
+        self.coco = COCO(ann_file)
+        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
+        self.img_ids = self.coco.getImgIds()
+        self.classes = [
+            cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds())
+        ]
+        self.num_classes = len(self.classes)
+        self._class_to_ind = dict(zip(self.classes, self.coco.getCatIds()))
+        self._ind_to_class = dict(zip(self.coco.getCatIds(), self.classes))
+        if valid_class_ids is not None: # None by default
+            self.valid_class_ids = valid_class_ids
+        else:
+            self.valid_class_ids = self.coco.getCatIds()
+        self.valid_classes = [self._ind_to_class[ind] for ind in self.valid_class_ids]
+        self.cats = self.coco.cats
+        self.max_kpt_num = max_kpt_num
+        # Also update self.cat2obj
+        self.db = self._get_db()
+        self.num_shots = num_shots
+        if not test_mode:
+            # Update every training epoch
+            self.random_paired_samples()
+        else:
+            self.num_queries = num_queries
+            self.num_episodes = num_episodes
+            self.make_paired_samples()
+    def random_paired_samples(self):
+        num_datas = [len(self.cat2obj[self._class_to_ind[cls]]) for cls in self.valid_classes]
+        # balance the dataset
+        max_num_data = max(num_datas)
+        all_samples = []
+        for cls in self.valid_class_ids:
+            for i in range(max_num_data):
+                shot = random.sample(self.cat2obj[cls], self.num_shots + 1)
+                all_samples.append(shot)
+        self.paired_samples = np.array(all_samples)
+        np.random.shuffle(self.paired_samples)
+    def make_paired_samples(self):
+        random.seed(1)
+        np.random.seed(0)
+        all_samples = []
+        for cls in self.valid_class_ids:
+            for _ in range(self.num_episodes):
+                shots = random.sample(self.cat2obj[cls], self.num_shots + self.num_queries)
+                sample_ids = shots[:self.num_shots]
+                query_ids = shots[self.num_shots:]
+                for query_id in query_ids:
+                    all_samples.append(sample_ids + [query_id])
+        self.paired_samples = np.array(all_samples)
+    def _select_kpt(self, obj, kpt_id):
+        obj['joints_3d'] = obj['joints_3d'][kpt_id:kpt_id+1]
+        obj['joints_3d_visible'] = obj['joints_3d_visible'][kpt_id:kpt_id+1]
+        obj['kpt_id'] = kpt_id
+        return obj
+    @staticmethod
+    def _get_mapping_id_name(imgs):
+        """
+        Args:
+            imgs (dict): dict of image info.
+        Returns:
+            tuple: Image name & id mapping dicts.
+            - id2name (dict): Mapping image id to name.
+            - name2id (dict): Mapping image name to id.
+        """
+        id2name = {}
+        name2id = {}
+        for image_id, image in imgs.items():
+            file_name = image['file_name']
+            id2name[image_id] = file_name
+            name2id[file_name] = image_id
+        return id2name, name2id
+    def _get_db(self):
+        """Ground truth bbox and keypoints."""
+        self.obj_id = 0
+        self.cat2obj = {}
+        for i in self.coco.getCatIds():
+            self.cat2obj.update({i: []})
+        gt_db = []
+        for img_id in self.img_ids:
+            gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+        return gt_db
+    def _load_coco_keypoint_annotation_kernel(self, img_id):
+        """load annotation from COCOAPI.
+        Note:
+            bbox:[x1, y1, w, h]
+        Args:
+            img_id: coco image id
+        Returns:
+            dict: db entry
+        """
+        img_ann = self.coco.loadImgs(img_id)[0]
+        width = img_ann['width']
+        height = img_ann['height']
+        ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+        objs = self.coco.loadAnns(ann_ids)
+        # sanitize bboxes
+        valid_objs = []
+        for obj in objs:
+            if 'bbox' not in obj:
+                continue
+            x, y, w, h = obj['bbox']
+            x1 = max(0, x)
+            y1 = max(0, y)
+            x2 = min(width - 1, x1 + max(0, w - 1))
+            y2 = min(height - 1, y1 + max(0, h - 1))
+            if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+                obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+                valid_objs.append(obj)
+        objs = valid_objs
+        bbox_id = 0
+        rec = []
+        for obj in objs:
+            if 'keypoints' not in obj:
+                continue
+            if max(obj['keypoints']) == 0:
+                continue
+            if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+                continue
+            category_id = obj['category_id']
+            # the number of keypoint for this specific category
+            cat_kpt_num = int(len(obj['keypoints']) / 3)
+            if self.max_kpt_num is None:
+                kpt_num = cat_kpt_num
+            else:
+                kpt_num = self.max_kpt_num
+            joints_3d = np.zeros((kpt_num, 3), dtype=np.float32)
+            joints_3d_visible = np.zeros((kpt_num, 3), dtype=np.float32)
+            keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+            joints_3d[:cat_kpt_num, :2] = keypoints[:, :2]
+            joints_3d_visible[:cat_kpt_num, :2] = np.minimum(1, keypoints[:, 2:3])
+            center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+            image_file = os.path.join(self.img_prefix, self.id2name[img_id])
+            if os.path.exists(image_file):
+                self.cat2obj[category_id].append(self.obj_id)
+                rec.append({
+                    'image_file': image_file,
+                    'center': center,
+                    'scale': scale,
+                    'rotation': 0,
+                    'bbox': obj['clean_bbox'][:4],
+                    'bbox_score': 1,
+                    'joints_3d': joints_3d,
+                    'joints_3d_visible': joints_3d_visible,
+                    'category_id': category_id,
+                    'cat_kpt_num': cat_kpt_num,
+                    'bbox_id': self.obj_id,
+                    'skeleton': self.coco.cats[obj['category_id']]['skeleton'],
+                })
+                bbox_id = bbox_id + 1
+                self.obj_id += 1
+        return rec
+    def _xywh2cs(self, x, y, w, h):
+        """This encodes bbox(x,y,w,w) into (center, scale)
+        Args:
+            x, y, w, h
+        Returns:
+            tuple: A tuple containing center and scale.
+            - center (np.ndarray[float32](2,)): center of the bbox (x, y).
+            - scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+        """
+        aspect_ratio = self.ann_info['image_size'][0] / self.ann_info['image_size'][1]
+        center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+        #
+        # if (not self.test_mode) and np.random.rand() < 0.3:
+        #     center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+        if w > aspect_ratio * h:
+            h = w * 1.0 / aspect_ratio
+        elif w < aspect_ratio * h:
+            w = h * aspect_ratio
+        # pixel std is 200.0
+        scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+        # padding to include proper amount of context
+        scale = scale * 1.25
+        return center, scale
+    def evaluate(self, outputs, res_folder, metric='PCK', **kwargs):
+        """Evaluate interhand2d keypoint results. The pose prediction results
+        will be saved in `${res_folder}/result_keypoints.json`.
+        Note:
+            batch_size: N
+            num_keypoints: K
+            heatmap height: H
+            heatmap width: W
+        Args:
+            outputs (list(preds, boxes, image_path, output_heatmap))
+                :preds (np.ndarray[N,K,3]): The first two dimensions are
+                    coordinates, score is the third dimension of the array.
+                :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0]
+                    , scale[1],area, score]
+                :image_paths (list[str]): For example, ['C', 'a', 'p', 't',
+                    'u', 'r', 'e', '1', '2', '/', '0', '3', '9', '0', '_',
+                    'd', 'h', '_', 't', 'o', 'u', 'c', 'h', 'R', 'O', 'M',
+                    '/', 'c', 'a', 'm', '4', '1', '0', '2', '0', '9', '/',
+                    'i', 'm', 'a', 'g', 'e', '6', '2', '4', '3', '4', '.',
+                    'j', 'p', 'g']
+                :output_heatmap (np.ndarray[N, K, H, W]): model outpus.
+            res_folder (str): Path of directory to save the results.
+            metric (str | list[str]): Metric to be performed.
+                Options: 'PCK', 'AUC', 'EPE'.
+        Returns:
+            dict: Evaluation results for evaluation metric.
+        """
+        metrics = metric if isinstance(metric, list) else [metric]
+        allowed_metrics = ['PCK', 'AUC', 'EPE', 'NME']
+        for metric in metrics:
+            if metric not in allowed_metrics:
+                raise KeyError(f'metric {metric} is not supported')
+        res_file = os.path.join(res_folder, 'result_keypoints.json')
+        kpts = []
+        for output in outputs:
+            preds = output['preds']
+            boxes = output['boxes']
+            image_paths = output['image_paths']
+            bbox_ids = output['bbox_ids']
+            batch_size = len(image_paths)
+            for i in range(batch_size):
+                image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+                kpts.append({
+                    'keypoints': preds[i].tolist(),
+                    'center': boxes[i][0:2].tolist(),
+                    'scale': boxes[i][2:4].tolist(),
+                    'area': float(boxes[i][4]),
+                    'score': float(boxes[i][5]),
+                    'image_id': image_id,
+                    'bbox_id': bbox_ids[i]
+                })
+        kpts = self._sort_and_unique_bboxes(kpts)
+        self._write_keypoint_results(kpts, res_file)
+        info_str = self._report_metric(res_file, metrics)
+        name_value = OrderedDict(info_str)
+        return name_value

EdgeCape/datasets/pipelines/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from .top_down_transform import (TopDownAffineFewShot,
+                                 TopDownGenerateTargetFewShot,
+                                 LoadDepthFromFile,
+                                 DepthTopDownAffineFewShot)
+__all__ = [
+    'TopDownGenerateTargetFewShot', 'TopDownAffineFewShot', 'LoadDepthFromFile', 'DepthTopDownAffineFewShot',
+]

EdgeCape/datasets/pipelines/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (373 Bytes). View file

EdgeCape/datasets/pipelines/__pycache__/post_transforms.cpython-39.pyc ADDED Viewed

Binary file (3.39 kB). View file

EdgeCape/datasets/pipelines/__pycache__/top_down_transform.cpython-39.pyc ADDED Viewed

Binary file (18 kB). View file

EdgeCape/datasets/pipelines/post_transforms.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+# Original licence: Copyright (c) Microsoft, under the MIT License.
+# ------------------------------------------------------------------------------
+import cv2
+import numpy as np
+def get_affine_transform(center,
+                         scale,
+                         rot,
+                         output_size,
+                         shift=(0., 0.),
+                         inv=False):
+    """Get the affine transform matrix, given the center/scale/rot/output_size.
+    Args:
+        center (np.ndarray[2, ]): Center of the bounding box (x, y).
+        scale (np.ndarray[2, ]): Scale of the bounding box
+            wrt [width, height].
+        rot (float): Rotation angle (degree).
+        output_size (np.ndarray[2, ]): Size of the destination heatmaps.
+        shift (0-100%): Shift translation ratio wrt the width/height.
+            Default (0., 0.).
+        inv (bool): Option to inverse the affine transform direction.
+            (inv=False: src->dst or inv=True: dst->src)
+    Returns:
+        np.ndarray: The transform matrix.
+    """
+    assert len(center) == 2
+    assert len(scale) == 2
+    assert len(output_size) == 2
+    assert len(shift) == 2
+    # pixel_std is 200.
+    scale_tmp = scale * 200.0
+    shift = np.array(shift)
+    src_w = scale_tmp[0]
+    dst_w = output_size[0]
+    dst_h = output_size[1]
+    rot_rad = np.pi * rot / 180
+    src_dir = rotate_point([0., src_w * -0.5], rot_rad)
+    dst_dir = np.array([0., dst_w * -0.5])
+    src = np.zeros((3, 2), dtype=np.float32)
+    src[0, :] = center + scale_tmp * shift
+    src[1, :] = center + src_dir + scale_tmp * shift
+    src[2, :] = _get_3rd_point(src[0, :], src[1, :])
+    dst = np.zeros((3, 2), dtype=np.float32)
+    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
+    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+    dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
+    if inv:
+        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+    else:
+        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+    return trans
+def affine_transform(pt, trans_mat):
+    """Apply an affine transformation to the points.
+    Args:
+        pt (np.ndarray): a 2 dimensional point to be transformed
+        trans_mat (np.ndarray): 2x3 matrix of an affine transform
+    Returns:
+        np.ndarray: Transformed points.
+    """
+    assert len(pt) == 2
+    new_pt = np.array(trans_mat) @ np.array([pt[0], pt[1], 1.])
+    return new_pt
+def _get_3rd_point(a, b):
+    """To calculate the affine matrix, three pairs of points are required. This
+    function is used to get the 3rd point, given 2D points a & b.
+    The 3rd point is defined by rotating vector `a - b` by 90 degrees
+    anticlockwise, using b as the rotation center.
+    Args:
+        a (np.ndarray): point(x,y)
+        b (np.ndarray): point(x,y)
+    Returns:
+        np.ndarray: The 3rd point.
+    """
+    assert len(a) == 2
+    assert len(b) == 2
+    direction = a - b
+    third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
+    return third_pt
+def rotate_point(pt, angle_rad):
+    """Rotate a point by an angle.
+    Args:
+        pt (list[float]): 2 dimensional point to be rotated
+        angle_rad (float): rotation angle by radian
+    Returns:
+        list[float]: Rotated point.
+    """
+    assert len(pt) == 2
+    sn, cs = np.sin(angle_rad), np.cos(angle_rad)
+    new_x = pt[0] * cs - pt[1] * sn
+    new_y = pt[0] * sn + pt[1] * cs
+    rotated_pt = [new_x, new_y]
+    return rotated_pt

EdgeCape/datasets/pipelines/top_down_transform.py ADDED Viewed

	@@ -0,0 +1,716 @@

+import os
+import warnings
+from pathlib import Path
+from typing import Optional
+import cv2
+import mmcv
+import numpy as np
+from mmcv import fileio
+from mmpose.datasets.builder import PIPELINES
+from .post_transforms import (affine_transform,
+                              get_affine_transform)
+from mmpose.core.post_processing import (affine_transform, fliplr_joints,
+                                         get_affine_transform, get_warp_matrix,
+                                         warp_affine_joints)
+@PIPELINES.register_module()
+class TopDownAffineFewShot:
+    """Affine transform the image to make input.
+    Required keys:'img', 'joints_3d', 'joints_3d_visible', 'ann_info','scale',
+    'rotation' and 'center'. Modified keys:'img', 'joints_3d', and
+    'joints_3d_visible'.
+    Args:
+        use_udp (bool): To use unbiased data processing.
+            Paper ref: Huang et al. The Devil is in the Details: Delving into
+            Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+    """
+    def __init__(self, use_udp=False):
+        self.use_udp = use_udp
+    def __call__(self, results):
+        image_size = results['ann_info']['image_size']
+        img = results['img']
+        joints_3d = results['joints_3d']
+        joints_3d_visible = results['joints_3d_visible']
+        c = results['center']
+        s = results['scale']
+        r = results['rotation']
+        if self.use_udp:
+            trans = get_warp_matrix(r, c * 2.0, image_size - 1.0, s * 200.0)
+            img = cv2.warpAffine(
+                img,
+                trans, (int(image_size[0]), int(image_size[1])),
+                flags=cv2.INTER_LINEAR)
+            joints_3d[:, 0:2] = \
+                warp_affine_joints(joints_3d[:, 0:2].copy(), trans)
+        else:
+            trans = get_affine_transform(c, s, r, image_size)
+            img = cv2.warpAffine(
+                img,
+                trans, (int(image_size[0]), int(image_size[1])),
+                flags=cv2.INTER_LINEAR)
+            for i in range(len(joints_3d)):
+                if joints_3d_visible[i, 0] > 0.0:
+                    joints_3d[i, 0:2] = affine_transform(joints_3d[i, 0:2], trans)
+        results['img'] = img
+        results['joints_3d'] = joints_3d
+        results['joints_3d_visible'] = joints_3d_visible
+        return results
+@PIPELINES.register_module()
+class TopDownGenerateTargetFewShot:
+    """Generate the target heatmap.
+    Required keys: 'joints_3d', 'joints_3d_visible', 'ann_info'.
+    Modified keys: 'target', and 'target_weight'.
+    Args:
+        sigma: Sigma of heatmap gaussian for 'MSRA' approach.
+        kernel: Kernel of heatmap gaussian for 'Megvii' approach.
+        encoding (str): Approach to generate target heatmaps.
+            Currently supported approaches: 'MSRA', 'Megvii', 'UDP'.
+            Default:'MSRA'
+        unbiased_encoding (bool): Option to use unbiased
+            encoding methods.
+            Paper ref: Zhang et al. Distribution-Aware Coordinate
+            Representation for Human Pose Estimation (CVPR 2020).
+        keypoint_pose_distance: Keypoint pose distance for UDP.
+            Paper ref: Huang et al. The Devil is in the Details: Delving into
+            Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+        target_type (str): supported targets: 'GaussianHeatMap',
+            'CombinedTarget'. Default:'GaussianHeatMap'
+            CombinedTarget: The combination of classification target
+            (response map) and regression target (offset map).
+            Paper ref: Huang et al. The Devil is in the Details: Delving into
+            Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+    """
+    def __init__(self,
+                 sigma=2,
+                 kernel=(11, 11),
+                 valid_radius_factor=0.0546875,
+                 target_type='GaussianHeatMap',
+                 encoding='MSRA',
+                 unbiased_encoding=False):
+        self.sigma = sigma
+        self.unbiased_encoding = unbiased_encoding
+        self.kernel = kernel
+        self.valid_radius_factor = valid_radius_factor
+        self.target_type = target_type
+        self.encoding = encoding
+    def _msra_generate_target(self, cfg, joints_3d, joints_3d_visible, sigma):
+        """Generate the target heatmap via "MSRA" approach.
+        Args:
+            cfg (dict): data config
+            joints_3d: np.ndarray ([num_joints, 3])
+            joints_3d_visible: np.ndarray ([num_joints, 3])
+            sigma: Sigma of heatmap gaussian
+        Returns:
+            tuple: A tuple containing targets.
+            - target: Target heatmaps.
+            - target_weight: (1: visible, 0: invisible)
+        """
+        num_joints = len(joints_3d)
+        image_size = cfg['image_size']
+        W, H = cfg['heatmap_size']
+        joint_weights = cfg['joint_weights']
+        use_different_joint_weights = cfg['use_different_joint_weights']
+        assert not use_different_joint_weights
+        target_weight = np.zeros((num_joints, 1), dtype=np.float32)
+        target = np.zeros((num_joints, H, W), dtype=np.float32)
+        # 3-sigma rule
+        tmp_size = sigma * 3
+        if self.unbiased_encoding:
+            for joint_id in range(num_joints):
+                target_weight[joint_id] = joints_3d_visible[joint_id, 0]
+                feat_stride = image_size / [W, H]
+                mu_x = joints_3d[joint_id][0] / feat_stride[0]
+                mu_y = joints_3d[joint_id][1] / feat_stride[1]
+                # Check that any part of the gaussian is in-bounds
+                ul = [mu_x - tmp_size, mu_y - tmp_size]
+                br = [mu_x + tmp_size + 1, mu_y + tmp_size + 1]
+                if ul[0] >= W or ul[1] >= H or br[0] < 0 or br[1] < 0:
+                    target_weight[joint_id] = 0
+                if target_weight[joint_id] == 0:
+                    continue
+                x = np.arange(0, W, 1, np.float32)
+                y = np.arange(0, H, 1, np.float32)
+                y = y[:, None]
+                if target_weight[joint_id] > 0.5:
+                    target[joint_id] = np.exp(-((x - mu_x)**2 +
+                                                (y - mu_y)**2) /
+                                              (2 * sigma**2))
+        else:
+            for joint_id in range(num_joints):
+                target_weight[joint_id] = joints_3d_visible[joint_id, 0]
+                feat_stride = image_size / [W, H]
+                mu_x = int(joints_3d[joint_id][0] / feat_stride[0] + 0.5)
+                mu_y = int(joints_3d[joint_id][1] / feat_stride[1] + 0.5)
+                # Check that any part of the gaussian is in-bounds
+                ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+                br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+                if ul[0] >= W or ul[1] >= H or br[0] < 0 or br[1] < 0:
+                    target_weight[joint_id] = 0
+                if target_weight[joint_id] > 0.5:
+                    size = 2 * tmp_size + 1
+                    x = np.arange(0, size, 1, np.float32)
+                    y = x[:, None]
+                    x0 = y0 = size // 2
+                    # The gaussian is not normalized,
+                    # we want the center value to equal 1
+                    g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
+                    # Usable gaussian range
+                    g_x = max(0, -ul[0]), min(br[0], W) - ul[0]
+                    g_y = max(0, -ul[1]), min(br[1], H) - ul[1]
+                    # Image range
+                    img_x = max(0, ul[0]), min(br[0], W)
+                    img_y = max(0, ul[1]), min(br[1], H)
+                    target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
+                        g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+        if use_different_joint_weights:
+            target_weight = np.multiply(target_weight, joint_weights)
+        return target, target_weight
+    def _udp_generate_target(self, cfg, joints_3d, joints_3d_visible, factor,
+                             target_type):
+        """Generate the target heatmap via 'UDP' approach. Paper ref: Huang et
+        al. The Devil is in the Details: Delving into Unbiased Data Processing
+        for Human Pose Estimation (CVPR 2020).
+        Note:
+            num keypoints: K
+            heatmap height: H
+            heatmap width: W
+            num target channels: C
+            C = K if target_type=='GaussianHeatMap'
+            C = 3*K if target_type=='CombinedTarget'
+        Args:
+            cfg (dict): data config
+            joints_3d (np.ndarray[K, 3]): Annotated keypoints.
+            joints_3d_visible (np.ndarray[K, 3]): Visibility of keypoints.
+            factor (float): kernel factor for GaussianHeatMap target or
+                valid radius factor for CombinedTarget.
+            target_type (str): 'GaussianHeatMap' or 'CombinedTarget'.
+                GaussianHeatMap: Heatmap target with gaussian distribution.
+                CombinedTarget: The combination of classification target
+                (response map) and regression target (offset map).
+        Returns:
+            tuple: A tuple containing targets.
+            - target (np.ndarray[C, H, W]): Target heatmaps.
+            - target_weight (np.ndarray[K, 1]): (1: visible, 0: invisible)
+        """
+        num_joints = len(joints_3d)
+        image_size = cfg['image_size']
+        heatmap_size = cfg['heatmap_size']
+        joint_weights = cfg['joint_weights']
+        use_different_joint_weights = cfg['use_different_joint_weights']
+        assert not use_different_joint_weights
+        target_weight = np.ones((num_joints, 1), dtype=np.float32)
+        target_weight[:, 0] = joints_3d_visible[:, 0]
+        assert target_type in ['GaussianHeatMap', 'CombinedTarget']
+        if target_type == 'GaussianHeatMap':
+            target = np.zeros((num_joints, heatmap_size[1], heatmap_size[0]),
+                              dtype=np.float32)
+            tmp_size = factor * 3
+            # prepare for gaussian
+            size = 2 * tmp_size + 1
+            x = np.arange(0, size, 1, np.float32)
+            y = x[:, None]
+            for joint_id in range(num_joints):
+                feat_stride = (image_size - 1.0) / (heatmap_size - 1.0)
+                mu_x = int(joints_3d[joint_id][0] / feat_stride[0] + 0.5)
+                mu_y = int(joints_3d[joint_id][1] / feat_stride[1] + 0.5)
+                # Check that any part of the gaussian is in-bounds
+                ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+                br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+                if ul[0] >= heatmap_size[0] or ul[1] >= heatmap_size[1] \
+                        or br[0] < 0 or br[1] < 0:
+                    # If not, just return the image as is
+                    target_weight[joint_id] = 0
+                    continue
+                # # Generate gaussian
+                mu_x_ac = joints_3d[joint_id][0] / feat_stride[0]
+                mu_y_ac = joints_3d[joint_id][1] / feat_stride[1]
+                x0 = y0 = size // 2
+                x0 += mu_x_ac - mu_x
+                y0 += mu_y_ac - mu_y
+                g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * factor**2))
+                # Usable gaussian range
+                g_x = max(0, -ul[0]), min(br[0], heatmap_size[0]) - ul[0]
+                g_y = max(0, -ul[1]), min(br[1], heatmap_size[1]) - ul[1]
+                # Image range
+                img_x = max(0, ul[0]), min(br[0], heatmap_size[0])
+                img_y = max(0, ul[1]), min(br[1], heatmap_size[1])
+                v = target_weight[joint_id]
+                if v > 0.5:
+                    target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
+                        g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+        elif target_type == 'CombinedTarget':
+            target = np.zeros(
+                (num_joints, 3, heatmap_size[1] * heatmap_size[0]),
+                dtype=np.float32)
+            feat_width = heatmap_size[0]
+            feat_height = heatmap_size[1]
+            feat_x_int = np.arange(0, feat_width)
+            feat_y_int = np.arange(0, feat_height)
+            feat_x_int, feat_y_int = np.meshgrid(feat_x_int, feat_y_int)
+            feat_x_int = feat_x_int.flatten()
+            feat_y_int = feat_y_int.flatten()
+            # Calculate the radius of the positive area in classification
+            #   heatmap.
+            valid_radius = factor * heatmap_size[1]
+            feat_stride = (image_size - 1.0) / (heatmap_size - 1.0)
+            for joint_id in range(num_joints):
+                mu_x = joints_3d[joint_id][0] / feat_stride[0]
+                mu_y = joints_3d[joint_id][1] / feat_stride[1]
+                x_offset = (mu_x - feat_x_int) / valid_radius
+                y_offset = (mu_y - feat_y_int) / valid_radius
+                dis = x_offset**2 + y_offset**2
+                keep_pos = np.where(dis <= 1)[0]
+                v = target_weight[joint_id]
+                if v > 0.5:
+                    target[joint_id, 0, keep_pos] = 1
+                    target[joint_id, 1, keep_pos] = x_offset[keep_pos]
+                    target[joint_id, 2, keep_pos] = y_offset[keep_pos]
+            target = target.reshape(num_joints * 3, heatmap_size[1],
+                                    heatmap_size[0])
+        if use_different_joint_weights:
+            target_weight = np.multiply(target_weight, joint_weights)
+        return target, target_weight
+    def __call__(self, results):
+        """Generate the target heatmap."""
+        joints_3d = results['joints_3d']
+        joints_3d_visible = results['joints_3d_visible']
+        assert self.encoding in ['MSRA', 'UDP']
+        if self.encoding == 'MSRA':
+            if isinstance(self.sigma, list):
+                num_sigmas = len(self.sigma)
+                cfg = results['ann_info']
+                num_joints = len(joints_3d)
+                heatmap_size = cfg['heatmap_size']
+                target = np.empty(
+                    (0, num_joints, heatmap_size[1], heatmap_size[0]),
+                    dtype=np.float32)
+                target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
+                for i in range(num_sigmas):
+                    target_i, target_weight_i = self._msra_generate_target(
+                        cfg, joints_3d, joints_3d_visible, self.sigma[i])
+                    target = np.concatenate([target, target_i[None]], axis=0)
+                    target_weight = np.concatenate(
+                        [target_weight, target_weight_i[None]], axis=0)
+            else:
+                target, target_weight = self._msra_generate_target(
+                    results['ann_info'], joints_3d, joints_3d_visible,
+                    self.sigma)
+        elif self.encoding == 'UDP':
+            if self.target_type == 'CombinedTarget':
+                factors = self.valid_radius_factor
+                channel_factor = 3
+            elif self.target_type == 'GaussianHeatMap':
+                factors = self.sigma
+                channel_factor = 1
+            if isinstance(factors, list):
+                num_factors = len(factors)
+                cfg = results['ann_info']
+                num_joints = len(joints_3d)
+                W, H = cfg['heatmap_size']
+                target = np.empty((0, channel_factor * num_joints, H, W),
+                                  dtype=np.float32)
+                target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
+                for i in range(num_factors):
+                    target_i, target_weight_i = self._udp_generate_target(
+                        cfg, joints_3d, joints_3d_visible, factors[i],
+                        self.target_type)
+                    target = np.concatenate([target, target_i[None]], axis=0)
+                    target_weight = np.concatenate(
+                        [target_weight, target_weight_i[None]], axis=0)
+            else:
+                target, target_weight = self._udp_generate_target(
+                    results['ann_info'], joints_3d, joints_3d_visible, factors,
+                    self.target_type)
+        else:
+            raise ValueError(
+                f'Encoding approach {self.encoding} is not supported!')
+        results['target'] = target
+        results['target_weight'] = target_weight
+        return results
+@PIPELINES.register_module()
+class LoadDepthFromFile:
+    """Load depthmap from file.
+    Required Keys:
+    - depth_path
+    Modified Keys:
+    - depth
+    Args:
+        to_float32 (bool): Whether to convert the loaded depth to a float32
+            numpy array. If set to False, the loaded depth is an uint8 array.
+            Defaults to False.
+        color_type (str): The flag argument for :func:`mmcv.imfrombytes`.
+            Defaults to 'color'.
+        imdecode_backend (str): The depth decoding backend type. The backend
+            argument for :func:`mmcv.imfrombytes`.
+            See :func:`mmcv.imfrombytes` for details.
+            Defaults to 'cv2'.
+        file_client_args (dict, optional): Arguments to instantiate a
+            FileClient. See :class:`mmengine.fileio.FileClient` for details.
+            Defaults to None. It will be deprecated in future. Please use
+            ``backend_args`` instead.
+            Deprecated in version 2.0.0rc4.
+        ignore_empty (bool): Whether to allow loading empty depth or file path
+            not existent. Defaults to False.
+        backend_args (dict, optional): Instantiates the corresponding file
+            backend. It may contain `backend` key to specify the file
+            backend. If it contains, the file backend corresponding to this
+            value will be used and initialized with the remaining values,
+            otherwise the corresponding file backend will be selected
+            based on the prefix of the file path. Defaults to None.
+            New in version 2.0.0rc4.
+    """
+    def __init__(self,
+                 to_float32=False,
+                 color_type='color',
+                 channel_order='rgb',
+                 file_client_args=dict(backend='disk')):
+        self.to_float32 = to_float32
+        self.color_type = color_type
+        self.channel_order = channel_order
+        self.file_client_args = file_client_args.copy()
+        self.file_client = None
+    def _read_depth(self, path):
+        img = np.load(path)['depth']
+        if img is None:
+            raise ValueError(f'Fail to read {path}')
+        if self.to_float32:
+            img = img.astype(np.float32)
+        return img
+    def __call__(self, results: dict) -> Optional[dict]:
+        """Functions to load depth.
+        Args:
+            results (dict): Result dict from
+                :class:`mmengine.dataset.BaseDataset`.
+        Returns:
+            dict: The dict contains loaded depth and meta information.
+        """
+        """Loading depth(s) from file."""
+        if self.file_client is None:
+            self.file_client = mmcv.FileClient(**self.file_client_args)
+        depth_file = results.get('depth_file', None)
+        # Replace file extension with npy
+        pre, ext = os.path.splitext(depth_file)
+        depth_file = pre + '.npz'
+        if isinstance(depth_file, (list, tuple)):
+            # Load depths from a list of paths
+            results['depth'] = [self._read_depth(path) for path in depth_file]
+        elif depth_file is not None:
+            # Load single depth from path
+            results['depth'] = self._read_depth(depth_file)
+        else:
+            if 'depth' not in results:
+                # If `depth_file`` is not in results, check the `img` exists
+                # and format the depth. This for compatibility when the depth
+                # is manually set outside the pipeline.
+                raise KeyError('Either `depth_file` or `img` should exist in '
+                               'results.')
+            if isinstance(results['depth'], (list, tuple)):
+                assert isinstance(results['depth'][0], np.ndarray)
+            else:
+                assert isinstance(results['depth'], np.ndarray)
+            results['depth_file'] = None
+        return results
+    def __repr__(self):
+        repr_str = (f'{self.__class__.__name__}('
+                    f'to_float32={self.to_float32}, '
+                    f"color_type='{self.color_type}', "
+                    f'file_client_args={self.file_client_args})')
+        return repr_str
+@PIPELINES.register_module()
+class DepthTopDownAffineFewShot:
+    """Affine transform the image to make input.
+    Required keys:'img', 'depth', 'joints_3d', 'joints_3d_visible', 'ann_info','scale',
+    'rotation' and 'center'. Modified keys:'img', 'joints_3d', and
+    'joints_3d_visible'.
+    Args:
+        use_udp (bool): To use unbiased data processing.
+            Paper ref: Huang et al. The Devil is in the Details: Delving into
+            Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+    """
+    def __init__(self, use_udp=False):
+        self.use_udp = use_udp
+    def __call__(self, results):
+        image_size = results['ann_info']['image_size']
+        img = results['img']
+        depth = results['depth']
+        joints_3d = results['joints_3d']
+        joints_3d_visible = results['joints_3d_visible']
+        c = results['center']
+        s = results['scale']
+        r = results['rotation']
+        if self.use_udp:
+            trans = get_warp_matrix(r, c * 2.0, image_size - 1.0, s * 200.0)
+            img = cv2.warpAffine(
+                img,
+                trans, (int(image_size[0]), int(image_size[1])),
+                flags=cv2.INTER_LINEAR)
+            depth = cv2.warpAffine(
+                depth,
+                trans, (int(image_size[0]), int(image_size[1])),
+                flags=cv2.INTER_LINEAR)
+            joints_3d[:, 0:2] = warp_affine_joints(joints_3d[:, 0:2].copy(), trans)
+        else:
+            trans = get_affine_transform(c, s, r, image_size)
+            img = cv2.warpAffine(
+                img,
+                trans, (int(image_size[0]), int(image_size[1])),
+                flags=cv2.INTER_LINEAR)
+            depth = cv2.warpAffine(
+                depth,
+                trans, (int(image_size[0]), int(image_size[1])),
+                flags=cv2.INTER_LINEAR)
+            for i in range(len(joints_3d)):
+                if joints_3d_visible[i, 0] > 0.0:
+                    joints_3d[i, 0:2] = affine_transform(joints_3d[i, 0:2], trans)
+        results['img'] = img
+        results['depth'] = depth
+        results['joints_3d'] = joints_3d
+        results['joints_3d_visible'] = joints_3d_visible
+        return results
+@PIPELINES.register_module()
+class LoadFeatFromFile:
+    """Load depthmap from file.
+    Required Keys:
+    - depth_path
+    Modified Keys:
+    - depth
+    Args:
+        to_float32 (bool): Whether to convert the loaded depth to a float32
+            numpy array. If set to False, the loaded depth is an uint8 array.
+            Defaults to False.
+        color_type (str): The flag argument for :func:`mmcv.imfrombytes`.
+            Defaults to 'color'.
+        imdecode_backend (str): The depth decoding backend type. The backend
+            argument for :func:`mmcv.imfrombytes`.
+            See :func:`mmcv.imfrombytes` for details.
+            Defaults to 'cv2'.
+        file_client_args (dict, optional): Arguments to instantiate a
+            FileClient. See :class:`mmengine.fileio.FileClient` for details.
+            Defaults to None. It will be deprecated in future. Please use
+            ``backend_args`` instead.
+            Deprecated in version 2.0.0rc4.
+        ignore_empty (bool): Whether to allow loading empty depth or file path
+            not existent. Defaults to False.
+        backend_args (dict, optional): Instantiates the corresponding file
+            backend. It may contain `backend` key to specify the file
+            backend. If it contains, the file backend corresponding to this
+            value will be used and initialized with the remaining values,
+            otherwise the corresponding file backend will be selected
+            based on the prefix of the file path. Defaults to None.
+            New in version 2.0.0rc4.
+    """
+    def __init__(self,
+                 to_float32=False,
+                 color_type='color',
+                 channel_order='rgb',
+                 file_client_args=dict(backend='disk')):
+        self.to_float32 = to_float32
+        self.color_type = color_type
+        self.channel_order = channel_order
+        self.file_client_args = file_client_args.copy()
+        self.file_client = None
+    def _read_depth(self, path):
+        img = np.load(path)['feat']
+        if img is None:
+            raise ValueError(f'Fail to read {path}')
+        if self.to_float32:
+            img = img.astype(np.float32)
+        return img
+    def __call__(self, results: dict) -> Optional[dict]:
+        """Functions to load depth.
+        Args:
+            results (dict): Result dict from
+                :class:`mmengine.dataset.BaseDataset`.
+        Returns:
+            dict: The dict contains loaded depth and meta information.
+        """
+        """Loading depth(s) from file."""
+        if self.file_client is None:
+            self.file_client = mmcv.FileClient(**self.file_client_args)
+        feat_file = results.get('feat_file', None)
+        # Replace file extension with npy
+        pre, ext = os.path.splitext(feat_file)
+        feat_file = pre + '.npz'
+        if isinstance(feat_file, (list, tuple)):
+            # Load depths from a list of paths
+            results['feat'] = [self._read_depth(path) for path in feat_file]
+        elif feat_file is not None:
+            # Load single depth from path
+            results['feat'] = self._read_depth(feat_file)
+        else:
+            if 'feat_file' not in results:
+                # If `depth_file`` is not in results, check the `img` exists
+                # and format the depth. This for compatibility when the depth
+                # is manually set outside the pipeline.
+                raise KeyError('Either `feat_file` or `img` should exist in results.')
+            if isinstance(results['feat'], (list, tuple)):
+                assert isinstance(results['feat'][0], np.ndarray)
+            else:
+                assert isinstance(results['feat'], np.ndarray)
+            results['feat_file'] = None
+        return results
+    def __repr__(self):
+        repr_str = (f'{self.__class__.__name__}('
+                    f'to_float32={self.to_float32}, '
+                    f"color_type='{self.color_type}', "
+                    f'file_client_args={self.file_client_args})')
+        return repr_str
+@PIPELINES.register_module()
+class FeatTopDownAffineFewShot:
+    """Affine transform the image to make input.
+    Required keys:'img', 'depth', 'joints_3d', 'joints_3d_visible', 'ann_info','scale',
+    'rotation' and 'center'. Modified keys:'img', 'joints_3d', and
+    'joints_3d_visible'.
+    Args:
+        use_udp (bool): To use unbiased data processing.
+            Paper ref: Huang et al. The Devil is in the Details: Delving into
+            Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+    """
+    def __init__(self, use_udp=False):
+        self.use_udp = use_udp
+    def __call__(self, results):
+        image_size = results['ann_info']['image_size']
+        img = results['img']
+        feat = results['feat']
+        joints_3d = results['joints_3d']
+        joints_3d_visible = results['joints_3d_visible']
+        c = results['center']
+        s = results['scale']
+        r = results['rotation']
+        if self.use_udp:
+            trans = get_warp_matrix(r, c * 2.0, image_size - 1.0, s * 200.0)
+            img = cv2.warpAffine(
+                img,
+                trans, (int(image_size[0]), int(image_size[1])),
+                flags=cv2.INTER_LINEAR)
+            feat = cv2.warpAffine(
+                feat,
+                trans, (int(image_size[0]), int(image_size[1])),
+                flags=cv2.INTER_LINEAR)
+            joints_3d[:, 0:2] = warp_affine_joints(joints_3d[:, 0:2].copy(), trans)
+        else:
+            trans = get_affine_transform(c, s, r, image_size)
+            img = cv2.warpAffine(
+                img,
+                trans, (int(image_size[0]), int(image_size[1])),
+                flags=cv2.INTER_LINEAR)
+            feat = cv2.warpAffine(
+                feat,
+                trans, (int(image_size[0]), int(image_size[1])),
+                flags=cv2.INTER_LINEAR)
+            for i in range(len(joints_3d)):
+                if joints_3d_visible[i, 0] > 0.0:
+                    joints_3d[i, 0:2] = affine_transform(joints_3d[i, 0:2], trans)
+        results['img'] = img
+        results['depth'] = feat
+        results['joints_3d'] = joints_3d
+        results['joints_3d_visible'] = joints_3d_visible
+        return results

EdgeCape/models/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .detectors import *  # noqa
+from .keypoint_heads import *  # noqa
+from .backbones import *  # noqa

EdgeCape/models/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (227 Bytes). View file

EdgeCape/models/backbones/__pycache__/adapter.cpython-39.pyc ADDED Viewed

Binary file (27.8 kB). View file

EdgeCape/models/backbones/__pycache__/dino.cpython-39.pyc ADDED Viewed

Binary file (5.48 kB). View file

EdgeCape/models/backbones/adapter.py ADDED Viewed

	@@ -0,0 +1,935 @@

+import torch.nn.functional as F
+import fvcore.nn.weight_init as weight_init
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.nn.functional import interpolate
+"""
+Code is based on: https://github.com/mbanani/probe3d
+"""
+class SurfaceNormalHead(nn.Module):
+    def __init__(
+        self,
+        feat_dim,
+        head_type="multiscale",
+        uncertainty_aware=False,
+        hidden_dim=512,
+        kernel_size=1,
+    ):
+        super().__init__()
+        self.uncertainty_aware = uncertainty_aware
+        output_dim = 4 if uncertainty_aware else 3
+        self.kernel_size = kernel_size
+        assert head_type in ["linear", "multiscale", "dpt"]
+        name = f"snorm_{head_type}_k{kernel_size}"
+        self.name = f"{name}_UA" if uncertainty_aware else name
+        if head_type == "linear":
+            self.head = Linear(feat_dim, output_dim, kernel_size)
+        elif head_type == "multiscale":
+            self.head = MultiscaleHead(feat_dim, output_dim, hidden_dim, kernel_size)
+        elif head_type == "dpt":
+            self.head = DPT(feat_dim, output_dim, hidden_dim, kernel_size)
+        else:
+            raise ValueError(f"Unknown head type: {self.head_type}")
+    def forward(self, feats):
+        return self.head(feats)
+class DepthHead(nn.Module):
+    def __init__(
+        self,
+        feat_dim,
+        head_type="multiscale",
+        min_depth=0.001,
+        max_depth=10,
+        prediction_type="bindepth",
+        hidden_dim=512,
+        kernel_size=1,
+    ):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.name = f"{prediction_type}_{head_type}_k{kernel_size}"
+        if prediction_type == "bindepth":
+            output_dim = 256
+            self.predict = DepthBinPrediction(min_depth, max_depth, n_bins=output_dim)
+        elif prediction_type == "sigdepth":
+            output_dim = 1
+            self.predict = DepthSigmoidPrediction(min_depth, max_depth)
+        else:
+            raise ValueError()
+        if head_type == "linear":
+            self.head = Linear(feat_dim, output_dim, kernel_size)
+        elif head_type == "multiscale":
+            self.head = MultiscaleHead(feat_dim, output_dim, hidden_dim, kernel_size)
+        elif head_type == "dpt":
+            self.head = DPT(feat_dim, output_dim, hidden_dim, kernel_size)
+        else:
+            raise ValueError(f"Unknown head type: {self.head_type}")
+    def forward(self, feats):
+        """Prediction each pixel."""
+        feats = self.head(feats)
+        depth = self.predict(feats)
+        return depth
+class DepthBinPrediction(nn.Module):
+    def __init__(
+        self,
+        min_depth=0.001,
+        max_depth=10,
+        n_bins=256,
+        bins_strategy="UD",
+        norm_strategy="linear",
+    ):
+        super().__init__()
+        self.n_bins = n_bins
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+        self.norm_strategy = norm_strategy
+        self.bins_strategy = bins_strategy
+    def forward(self, prob):
+        if self.bins_strategy == "UD":
+            bins = torch.linspace(
+                self.min_depth, self.max_depth, self.n_bins, device=prob.device
+            )
+        elif self.bins_strategy == "SID":
+            bins = torch.logspace(
+                self.min_depth, self.max_depth, self.n_bins, device=prob.device
+            )
+        # following Adabins, default linear
+        if self.norm_strategy == "linear":
+            prob = torch.relu(prob)
+            eps = 0.1
+            prob = prob + eps
+            prob = prob / prob.sum(dim=1, keepdim=True)
+        elif self.norm_strategy == "softmax":
+            prob = torch.softmax(prob, dim=1)
+        elif self.norm_strategy == "sigmoid":
+            prob = torch.sigmoid(prob)
+            prob = prob / prob.sum(dim=1, keepdim=True)
+        depth = torch.einsum("ikhw,k->ihw", [prob, bins])
+        depth = depth.unsqueeze(dim=1)
+        return depth
+class DepthSigmoidPrediction(nn.Module):
+    def __init__(self, min_depth=0.001, max_depth=10):
+        super().__init__()
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+    def forward(self, pred):
+        depth = pred.sigmoid()
+        depth = self.min_depth + depth * (self.max_depth - self.min_depth)
+        return depth
+class FeatureFusionBlock(nn.Module):
+    def __init__(self, features, kernel_size, with_skip=True):
+        super().__init__()
+        self.with_skip = with_skip
+        if self.with_skip:
+            self.resConfUnit1 = ResidualConvUnit(features, kernel_size)
+        self.resConfUnit2 = ResidualConvUnit(features, kernel_size)
+    def forward(self, x, skip_x=None):
+        if skip_x is not None:
+            assert self.with_skip and skip_x.shape == x.shape
+            x = self.resConfUnit1(x) + skip_x
+        x = self.resConfUnit2(x)
+        return x
+class ResidualConvUnit(nn.Module):
+    def __init__(self, features, kernel_size):
+        super().__init__()
+        assert kernel_size % 1 == 0, "Kernel size needs to be odd"
+        padding = kernel_size // 2
+        self.conv = nn.Sequential(
+            nn.Conv2d(features, features, kernel_size, padding=padding),
+            nn.ReLU(True),
+            nn.Conv2d(features, features, kernel_size, padding=padding),
+            nn.ReLU(True),
+        )
+    def forward(self, x):
+        return self.conv(x) + x
+class DPT(nn.Module):
+    def __init__(self, input_dims, output_dim, hidden_dim=512, kernel_size=3, hr=False, swin=False):
+        super().__init__()
+        assert len(input_dims) == 4
+        self.hr = hr
+        self.conv_0 = nn.Conv2d(input_dims[0], hidden_dim, 1, padding=0)
+        self.conv_1 = nn.Conv2d(input_dims[1], hidden_dim, 1, padding=0)
+        self.conv_2 = nn.Conv2d(input_dims[2], hidden_dim, 1, padding=0)
+        self.conv_3 = nn.Conv2d(input_dims[3], hidden_dim, 1, padding=0)
+        self.ref_0 = FeatureFusionBlock(hidden_dim, kernel_size)
+        self.ref_1 = FeatureFusionBlock(hidden_dim, kernel_size)
+        self.ref_2 = FeatureFusionBlock(hidden_dim, kernel_size)
+        self.ref_3 = FeatureFusionBlock(hidden_dim, kernel_size, with_skip=False)
+        self.out_conv = nn.Sequential(
+            nn.Conv2d(hidden_dim, hidden_dim, 3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(hidden_dim, output_dim, 3, padding=1),
+        )
+        if swin:
+            self.scale_factor = [1, 2, 4, 4]
+        else:
+            self.scale_factor = [2, 2, 2, 2]
+    def forward(self, features):
+        """Prediction each pixel."""
+        assert len(features) == 4
+        feats = features.copy()
+        feats[0] = self.conv_0(feats[0])
+        feats[1] = self.conv_1(feats[1])
+        feats[2] = self.conv_2(feats[2])
+        feats[3] = self.conv_3(feats[3])
+        feats = [interpolate(x, scale_factor=scale_factor) for x, scale_factor in zip(feats, self.scale_factor)]
+        out = self.ref_3(feats[3], None)
+        out = self.ref_2(feats[2], out)
+        out = self.ref_1(feats[1], out)
+        out = self.ref_0(feats[0], out)
+        if not self.hr:
+            return self.out_conv(out)
+        out = interpolate(out, scale_factor=4)
+        out = self.out_conv(out)
+        # out = interpolate(out, scale_factor=2)
+        return out
+def make_conv(input_dim, hidden_dim, output_dim, num_layers, kernel_size=1):
+    return conv
+class Linear(nn.Module):
+    def __init__(self, input_dim, output_dim, kernel_size=1):
+        super().__init__()
+        if type(input_dim) is not int:
+            input_dim = sum(input_dim)
+        assert type(input_dim) is int
+        padding = kernel_size // 2
+        self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, padding=padding)
+    def forward(self, feats):
+        if type(feats) is list:
+            feats = torch.cat(feats, dim=1)
+        feats = interpolate(feats, scale_factor=4, mode="bilinear")
+        return self.conv(feats)
+class MultiscaleHead(nn.Module):
+    def __init__(self, input_dims, output_dim, hidden_dim=512, kernel_size=1):
+        super().__init__()
+        self.convs = nn.ModuleList(
+            [make_conv(in_d, None, hidden_dim, 1, kernel_size) for in_d in input_dims]
+        )
+        interm_dim = len(input_dims) * hidden_dim
+        self.conv_mid = make_conv(interm_dim, hidden_dim, hidden_dim, 3, kernel_size)
+        self.conv_out = make_conv(hidden_dim, hidden_dim, output_dim, 2, kernel_size)
+    def forward(self, feats):
+        num_feats = len(feats)
+        feats = [self.convs[i](feats[i]) for i in range(num_feats)]
+        h, w = feats[-1].shape[-2:]
+        feats = [interpolate(feat, (h, w), mode="bilinear") for feat in feats]
+        feats = torch.cat(feats, dim=1).relu()
+        # upsample
+        feats = interpolate(feats, scale_factor=2, mode="bilinear")
+        feats = self.conv_mid(feats).relu()
+        feats = interpolate(feats, scale_factor=4, mode="bilinear")
+        return self.conv_out(feats)
+def get_norm(norm, out_channels, num_norm_groups=32):
+    """
+    Args:
+        norm (str or callable): either one of BN, SyncBN, FrozenBN, GN;
+            or a callable that takes a channel number and returns
+            the normalization layer as a nn.Module.
+    Returns:
+        nn.Module or None: the normalization layer
+    """
+    if norm is None:
+        return None
+    if isinstance(norm, str):
+        if len(norm) == 0:
+            return None
+        norm = {
+            "GN": lambda channels: nn.GroupNorm(num_norm_groups, channels),
+        }[norm]
+    return norm(out_channels)
+def get_activation(activation):
+    """
+    Args:
+        activation (str or callable): either one of relu, lrelu, prelu, leaky_relu,
+            sigmoid, tanh, elu, selu, swish, mish; or a callable that takes a
+            tensor and returns a tensor.
+    Returns:
+        nn.Module or None: the activation layer
+    """
+    if activation is None:
+        return None
+    if isinstance(activation, str):
+        if len(activation) == 0:
+            return None
+        activation = {
+            "relu": nn.ReLU,
+            "lrelu": nn.LeakyReLU,
+            "prelu": nn.PReLU,
+            "leaky_relu": nn.LeakyReLU,
+            "sigmoid": nn.Sigmoid,
+            "tanh": nn.Tanh,
+            "elu": nn.ELU,
+            "selu": nn.SELU,
+        }[activation]
+    return activation()
+# SCE crisscross + diags
+class EfficientSpatialContextNet(nn.Module):
+    def __init__(self, kernel_size=7, in_channels=768, out_channels=768, use_cuda=True):
+        super(EfficientSpatialContextNet, self).__init__()
+        self.kernel_size = kernel_size
+        self.pad = kernel_size // 2
+        self.conv = torch.nn.Conv2d(
+            in_channels + 4 * self.kernel_size,
+            out_channels,
+            1,
+            bias=True,
+            padding_mode="zeros",
+        )
+        if use_cuda:
+            self.conv = self.conv.cuda()
+    def forward(self, feature):
+        b, c, h, w = feature.size()
+        feature_normalized = F.normalize(feature, p=2, dim=1)
+        feature_pad = F.pad(
+            feature_normalized, (self.pad, self.pad, self.pad, self.pad), "constant", 0
+        )
+        output = torch.zeros(
+            [4 * self.kernel_size, b, h, w],
+            dtype=feature.dtype,
+            requires_grad=feature.requires_grad,
+        )
+        if feature.is_cuda:
+            output = output.cuda(feature.get_device())
+        # left-top to right-bottom
+        for i in range(self.kernel_size):
+            c = i
+            r = i
+            output[i] = (feature_pad[:, :, r: (h + r), c: (w + c)] * feature_normalized).sum(1)
+        # col
+        for i in range(self.kernel_size):
+            c = self.kernel_size // 2
+            r = i
+            output[1 * self.kernel_size + i] = (feature_pad[:, :, r: (h + r), c: (w + c)] * feature_normalized).sum(1)
+        # right-top to left-bottom
+        for i in range(self.kernel_size):
+            c = (self.kernel_size - 1) - i
+            r = i
+            output[2 * self.kernel_size + i] = (feature_pad[:, :, r: (h + r), c: (w + c)] * feature_normalized).sum(1)
+        # row
+        for i in range(self.kernel_size):
+            c = i
+            r = self.kernel_size // 2
+            output[3 * self.kernel_size + i] = (feature_pad[:, :, r: (h + r), c: (w + c)] * feature_normalized).sum(1)
+        output = output.transpose(0, 1).contiguous()
+        output = torch.cat((feature, output), 1)
+        output = self.conv(output)
+        # output = F.relu(output)
+        return output
+class Conv2d(nn.Conv2d):
+    """
+    A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
+    """
+    def __init__(self, *args, **kwargs):
+        """
+        Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
+        Args:
+            norm (nn.Module, optional): a normalization layer
+            activation (callable(Tensor) -> Tensor): a callable activation function
+        It assumes that norm layer is used before activation.
+        """
+        norm = kwargs.pop("norm", None)
+        activation = kwargs.pop("activation", None)
+        super().__init__(*args, **kwargs)
+        self.norm = norm
+        self.activation = activation
+    def forward(self, x):
+        x = F.conv2d(
+            x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups
+        )
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+class CNNBlockBase(nn.Module):
+    """
+    A CNN block is assumed to have input channels, output channels and a stride.
+    The input and output of `forward()` method must be NCHW tensors.
+    The method can perform arbitrary computation but must match the given
+    channels and stride specification.
+    Attribute:
+        in_channels (int):
+        out_channels (int):
+        stride (int):
+    """
+    def __init__(self, in_channels, out_channels, stride):
+        """
+        The `__init__` method of any subclass should also contain these arguments.
+        Args:
+            in_channels (int):
+            out_channels (int):
+            stride (int):
+        """
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+class BottleneckBlock(CNNBlockBase):
+    """
+    The standard bottleneck residual block used by ResNet-50, 101 and 152
+    defined in :paper:`ResNet`.  It contains 3 conv layers with kernels
+    1x1, 3x3, 1x1, and a projection shortcut if needed.
+    """
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            *,
+            bottleneck_channels,
+            stride=1,
+            num_groups=1,
+            norm="GN",
+            stride_in_1x1=False,
+            dilation=1,
+            num_norm_groups=32,
+            kernel_size=(1, 3, 1)
+    ):
+        """
+        Args:
+            bottleneck_channels (int): number of output channels for the 3x3
+                "bottleneck" conv layers.
+            num_groups (int): number of groups for the 3x3 conv layer.
+            norm (str or callable): normalization for all conv layers.
+                See :func:`layers.get_norm` for supported format.
+            stride_in_1x1 (bool): when stride>1, whether to put stride in the
+                first 1x1 convolution or the bottleneck 3x3 convolution.
+            dilation (int): the dilation rate of the 3x3 conv layer.
+        """
+        super().__init__(in_channels, out_channels, stride)
+        if in_channels != out_channels:
+            self.shortcut = Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=1,
+                stride=stride,
+                bias=False,
+                norm=get_norm(norm, out_channels, num_norm_groups),
+            )
+        else:
+            self.shortcut = None
+        # The original MSRA ResNet models have stride in the first 1x1 conv
+        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
+        # stride in the 3x3 conv
+        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
+        self.conv1 = Conv2d(
+            in_channels,
+            bottleneck_channels,
+            kernel_size=kernel_size[0],
+            stride=stride_1x1,
+            padding=(kernel_size[0] - 1) // 2,
+            bias=False,
+            norm=get_norm(norm, bottleneck_channels, num_norm_groups),
+        )
+        self.conv2 = Conv2d(
+            bottleneck_channels,
+            bottleneck_channels,
+            kernel_size=kernel_size[1],
+            stride=stride_3x3,
+            padding=dilation * (kernel_size[1] - 1) // 2,
+            bias=False,
+            groups=num_groups,
+            dilation=dilation,
+            norm=get_norm(norm, bottleneck_channels, num_norm_groups),
+        )
+        self.conv3 = Conv2d(
+            bottleneck_channels,
+            out_channels,
+            kernel_size=kernel_size[2],
+            bias=False,
+            norm=get_norm(norm, out_channels, num_norm_groups),
+        )
+        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
+            if layer is not None:  # shortcut can be None
+                weight_init.c2_msra_fill(layer)
+        # Zero-initialize the last normalization in each residual branch,
+        # so that at the beginning, the residual branch starts with zeros,
+        # and each residual block behaves like an identity.
+        # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
+        # "For BN layers, the learnable scaling coefficient γ is initialized
+        # to be 1, except for each residual block's last BN
+        # where γ is initialized to be 0."
+        # nn.init.constant_(self.conv3.norm.weight, 0)
+        # TODO this somehow hurts performance when training GN models from scratch.
+        # Add it as an option when we need to use this code to train a backbone.
+    def forward(self, x):
+        out = self.conv1(x)
+        out = F.relu_(out)
+        out = self.conv2(out)
+        out = F.relu_(out)
+        out = self.conv3(out)
+        if self.shortcut is not None:
+            shortcut = self.shortcut(x)
+        else:
+            shortcut = x
+        out += shortcut
+        out = F.relu_(out)
+        return out
+class ResNet(nn.Module):
+    """
+    Implement :paper:`ResNet`.
+    """
+    def __init__(self, stem, stages, num_classes=None, out_features=None, freeze_at=0):
+        """
+        Args:
+            stem (nn.Module): a stem module
+            stages (list[list[CNNBlockBase]]): several (typically 4) stages,
+                each contains multiple :class:`CNNBlockBase`.
+            num_classes (None or int): if None, will not perform classification.
+                Otherwise, will create a linear layer.
+            out_features (list[str]): name of the layers whose outputs should
+                be returned in forward. Can be anything in "stem", "linear", or "res2" ...
+                If None, will return the output of the last layer.
+            freeze_at (int): The number of stages at the beginning to freeze.
+                see :meth:`freeze` for detailed explanation.
+        """
+        super().__init__()
+        self.stem = stem
+        self.num_classes = num_classes
+        current_stride = self.stem.stride
+        self._out_feature_strides = {"stem": current_stride}
+        self._out_feature_channels = {"stem": self.stem.out_channels}
+        self.stage_names, self.stages = [], []
+        if out_features is not None:
+            # Avoid keeping unused layers in this module. They consume extra memory
+            # and may cause allreduce to fail
+            num_stages = max(
+                [{"res2": 1, "res3": 2, "res4": 3, "res5": 4}.get(f, 0) for f in out_features]
+            )
+            stages = stages[:num_stages]
+        for i, blocks in enumerate(stages):
+            assert len(blocks) > 0, len(blocks)
+            for block in blocks:
+                assert isinstance(block, CNNBlockBase), block
+            name = "res" + str(i + 2)
+            stage = nn.Sequential(*blocks)
+            self.add_module(name, stage)
+            self.stage_names.append(name)
+            self.stages.append(stage)
+            self._out_feature_strides[name] = current_stride = int(
+                current_stride * np.prod([k.stride for k in blocks])
+            )
+            self._out_feature_channels[name] = curr_channels = blocks[-1].out_channels
+        self.stage_names = tuple(self.stage_names)  # Make it static for scripting
+        if num_classes is not None:
+            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+            self.linear = nn.Linear(curr_channels, num_classes)
+            # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
+            # "The 1000-way fully-connected layer is initialized by
+            # drawing weights from a zero-mean Gaussian with standard deviation of 0.01."
+            nn.init.normal_(self.linear.weight, std=0.01)
+            name = "linear"
+        if out_features is None:
+            out_features = [name]
+        self._out_features = out_features
+        assert len(self._out_features)
+        children = [x[0] for x in self.named_children()]
+        for out_feature in self._out_features:
+            assert out_feature in children, "Available children: {}".format(", ".join(children))
+        self.freeze(freeze_at)
+    def forward(self, x):
+        """
+        Args:
+            x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``.
+        Returns:
+            dict[str->Tensor]: names and the corresponding features
+        """
+        assert x.dim() == 4, f"ResNet takes an input of shape (N, C, H, W). Got {x.shape} instead!"
+        outputs = {}
+        x = self.stem(x)
+        if "stem" in self._out_features:
+            outputs["stem"] = x
+        for name, stage in zip(self.stage_names, self.stages):
+            x = stage(x)
+            if name in self._out_features:
+                outputs[name] = x
+        if self.num_classes is not None:
+            x = self.avgpool(x)
+            x = torch.flatten(x, 1)
+            x = self.linear(x)
+            if "linear" in self._out_features:
+                outputs["linear"] = x
+        return outputs
+    def freeze(self, freeze_at=0):
+        """
+        Freeze the first several stages of the ResNet. Commonly used in
+        fine-tuning.
+        Layers that produce the same feature map spatial size are defined as one
+        "stage" by :paper:`FPN`.
+        Args:
+            freeze_at (int): number of stages to freeze.
+                `1` means freezing the stem. `2` means freezing the stem and
+                one residual stage, etc.
+        Returns:
+            nn.Module: this ResNet itself
+        """
+        if freeze_at >= 1:
+            self.stem.freeze()
+        for idx, stage in enumerate(self.stages, start=2):
+            if freeze_at >= idx:
+                for block in stage.children():
+                    block.freeze()
+        return self
+    @staticmethod
+    def make_stage(block_class, num_blocks, *, in_channels, out_channels, **kwargs):
+        """
+        Create a list of blocks of the same type that forms one ResNet stage.
+        Args:
+            block_class (type): a subclass of CNNBlockBase that's used to create all blocks in this
+                stage. A module of this type must not change spatial resolution of inputs unless its
+                stride != 1.
+            num_blocks (int): number of blocks in this stage
+            in_channels (int): input channels of the entire stage.
+            out_channels (int): output channels of **every block** in the stage.
+            kwargs: other arguments passed to the constructor of
+                `block_class`. If the argument name is "xx_per_block", the
+                argument is a list of values to be passed to each block in the
+                stage. Otherwise, the same argument is passed to every block
+                in the stage.
+        Returns:
+            list[CNNBlockBase]: a list of block module.
+        Examples:
+        ::
+            stage = ResNet.make_stage(
+                BottleneckBlock, 3, in_channels=16, out_channels=64,
+                bottleneck_channels=16, num_groups=1,
+                stride_per_block=[2, 1, 1],
+                dilations_per_block=[1, 1, 2]
+            )
+        Usually, layers that produce the same feature map spatial size are defined as one
+        "stage" (in :paper:`FPN`). Under such definition, ``stride_per_block[1:]`` should
+        all be 1.
+        """
+        blocks = []
+        for i in range(num_blocks):
+            curr_kwargs = {}
+            for k, v in kwargs.items():
+                if k.endswith("_per_block"):
+                    assert len(v) == num_blocks, (
+                        f"Argument '{k}' of make_stage should have the "
+                        f"same length as num_blocks={num_blocks}."
+                    )
+                    newk = k[: -len("_per_block")]
+                    assert newk not in kwargs, f"Cannot call make_stage with both {k} and {newk}!"
+                    curr_kwargs[newk] = v[i]
+                else:
+                    curr_kwargs[k] = v
+            blocks.append(
+                block_class(in_channels=in_channels, out_channels=out_channels, **curr_kwargs)
+            )
+            in_channels = out_channels
+        return blocks
+    @staticmethod
+    def make_default_stages(depth, block_class=None, **kwargs):
+        """
+        Created list of ResNet stages from pre-defined depth (one of 18, 34, 50, 101, 152).
+        If it doesn't create the ResNet variant you need, please use :meth:`make_stage`
+        instead for fine-grained customization.
+        Args:
+            depth (int): depth of ResNet
+            block_class (type): the CNN block class. Has to accept
+                `bottleneck_channels` argument for depth > 50.
+                By default it is BasicBlock or BottleneckBlock, based on the
+                depth.
+            kwargs:
+                other arguments to pass to `make_stage`. Should not contain
+                stride and channels, as they are predefined for each depth.
+        Returns:
+            list[list[CNNBlockBase]]: modules in all stages; see arguments of
+                :class:`ResNet.__init__`.
+        """
+        num_blocks_per_stage = {
+            18: [2, 2, 2, 2],
+            34: [3, 4, 6, 3],
+            50: [3, 4, 6, 3],
+            101: [3, 4, 23, 3],
+            152: [3, 8, 36, 3],
+        }[depth]
+        if block_class is None:
+            block_class = BasicBlock if depth < 50 else BottleneckBlock
+        if depth < 50:
+            in_channels = [64, 64, 128, 256]
+            out_channels = [64, 128, 256, 512]
+        else:
+            in_channels = [64, 256, 512, 1024]
+            out_channels = [256, 512, 1024, 2048]
+        ret = []
+        for (n, s, i, o) in zip(num_blocks_per_stage, [1, 2, 2, 2], in_channels, out_channels):
+            if depth >= 50:
+                kwargs["bottleneck_channels"] = o // 4
+            ret.append(
+                ResNet.make_stage(
+                    block_class=block_class,
+                    num_blocks=n,
+                    stride_per_block=[s] + [1] * (n - 1),
+                    in_channels=i,
+                    out_channels=o,
+                    **kwargs,
+                )
+            )
+        return ret
+class DummyAggregationNetwork(nn.Module):  # for testing, return the input
+    def __init__(self):
+        super(DummyAggregationNetwork, self).__init__()
+        # dummy paprameter
+        self.dummy = nn.Parameter(torch.ones([]))
+    def forward(self, batch, pose=None):
+        return batch * self.dummy
+class AggregationNetwork(nn.Module):
+    """
+    Module for aggregating feature maps across time and space.
+    Design inspired by the Feature Extractor from ODISE (Xu et. al., CVPR 2023).
+    https://github.com/NVlabs/ODISE/blob/5836c0adfcd8d7fd1f8016ff5604d4a31dd3b145/odise/modeling/backbone/feature_extractor.py
+    """
+    def __init__(
+            self,
+            device,
+            feature_dims=[640, 1280, 1280, 768],
+            projection_dim=384,
+            num_norm_groups=32,
+            save_timestep=[1],
+            kernel_size=[1, 3, 1],
+            contrastive_temp=10,
+            feat_map_dropout=0.0,
+    ):
+        super().__init__()
+        self.skip_connection = True
+        self.feat_map_dropout = feat_map_dropout
+        self.azimuth_embedding = None
+        self.pos_embedding = None
+        self.bottleneck_layers = nn.ModuleList()
+        self.feature_dims = feature_dims
+        # For CLIP symmetric cross entropy loss during training
+        self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07))
+        self.self_logit_scale = nn.Parameter(torch.ones([]) * np.log(contrastive_temp))
+        self.device = device
+        self.save_timestep = save_timestep
+        self.mixing_weights_names = []
+        for l, feature_dim in enumerate(self.feature_dims):
+            bottleneck_layer = nn.Sequential(
+                *ResNet.make_stage(
+                    BottleneckBlock,
+                    num_blocks=1,
+                    in_channels=feature_dim,
+                    bottleneck_channels=projection_dim // 4,
+                    out_channels=projection_dim,
+                    norm="GN",
+                    num_norm_groups=num_norm_groups,
+                    kernel_size=kernel_size
+                )
+            )
+            self.bottleneck_layers.append(bottleneck_layer)
+            for t in save_timestep:
+                # 1-index the layer name following prior work
+                self.mixing_weights_names.append(f"timestep-{save_timestep}_layer-{l + 1}")
+        self.last_layer = None
+        self.bottleneck_layers = self.bottleneck_layers.to(device)
+        mixing_weights = torch.ones(len(self.bottleneck_layers) * len(save_timestep))
+        self.mixing_weights = nn.Parameter(mixing_weights.to(device))
+        # count number of parameters
+        num_params = 0
+        for param in self.parameters():
+            num_params += param.numel()
+        print(f"AggregationNetwork has {num_params} parameters.")
+    def load_pretrained_weights(self, pretrained_dict):
+        custom_dict = self.state_dict()
+        # Handle size mismatch
+        if 'mixing_weights' in custom_dict and 'mixing_weights' in pretrained_dict and custom_dict[
+            'mixing_weights'].shape != pretrained_dict['mixing_weights'].shape:
+            # Keep the first four weights from the pretrained model, and randomly initialize the fifth weight
+            custom_dict['mixing_weights'][:4] = pretrained_dict['mixing_weights'][:4]
+            custom_dict['mixing_weights'][4] = torch.zeros_like(custom_dict['mixing_weights'][4])
+        else:
+            custom_dict['mixing_weights'][:4] = pretrained_dict['mixing_weights'][:4]
+        # Load the weights that do match
+        matching_keys = {k: v for k, v in pretrained_dict.items() if k in custom_dict and k != 'mixing_weights'}
+        custom_dict.update(matching_keys)
+        # Now load the updated state_dict
+        self.load_state_dict(custom_dict, strict=False)
+    def forward(self, batch, pose=None):
+        """
+        Assumes batch is shape (B, C, H, W) where C is the concatentation of all layer features.
+        """
+        if self.feat_map_dropout > 0 and self.training:
+            batch = F.dropout(batch, p=self.feat_map_dropout)
+        output_feature = None
+        start = 0
+        mixing_weights = torch.nn.functional.softmax(self.mixing_weights, dim=0)
+        if self.pos_embedding is not None:  # position embedding
+            batch = torch.cat((batch, self.pos_embedding), dim=1)
+        for i in range(len(mixing_weights)):
+            # Share bottleneck layers across timesteps
+            bottleneck_layer = self.bottleneck_layers[i % len(self.feature_dims)]
+            # Chunk the batch according the layer
+            # Account for looping if there are multiple timesteps
+            end = start + self.feature_dims[i % len(self.feature_dims)]
+            feats = batch[:, start:end, :, :]
+            start = end
+            # Downsample the number of channels and weight the layer
+            bottlenecked_feature = bottleneck_layer(feats)
+            bottlenecked_feature = mixing_weights[i] * bottlenecked_feature
+            if output_feature is None:
+                output_feature = bottlenecked_feature
+            else:
+                output_feature += bottlenecked_feature
+        if self.last_layer is not None:
+            output_feature_after = self.last_layer(output_feature)
+            if self.skip_connection:
+                # skip connection
+                output_feature = output_feature + output_feature_after
+        return output_feature
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution without padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False)
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
+class BasicBlock(nn.Module):
+    def __init__(self, in_planes, planes, stride=1):
+        super().__init__()
+        self.conv1 = conv3x3(in_planes, planes, stride)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        if stride == 1:
+            self.downsample = None
+        else:
+            self.downsample = nn.Sequential(
+                conv1x1(in_planes, planes, stride=stride),
+                nn.BatchNorm2d(planes)
+            )
+    def forward(self, x):
+        y = x
+        y = self.relu(self.bn1(self.conv1(y)))
+        y = self.bn2(self.conv2(y))
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return self.relu(x + y)

EdgeCape/models/backbones/dino.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import einops as E
+import numpy as np
+import torch
+import torch.nn.functional as F
+from transformers.models.vit_mae.modeling_vit_mae import (
+    get_2d_sincos_pos_embed_from_grid,
+)
+def resize_pos_embed(
+    pos_embed: torch.Tensor, hw: tuple[int, int], has_cls_token: bool = True
+):
+    """
+    Resize positional embedding for arbitrary image resolution. Resizing is done
+    via bicubic interpolation.
+    Args:
+        pos_embed: Positional embedding tensor of shape ``(n_patches, embed_dim)``.
+        hw: Target height and width of the tensor after interpolation.
+        has_cls_token: Whether ``pos_embed[0]`` is for the ``[cls]`` token.
+    Returns:
+        Tensor of shape ``(new_n_patches, embed_dim)`` of resized embedding.
+        ``new_n_patches`` is ``new_height * new_width`` if ``has_cls`` is False,
+        else ``1 + new_height * new_width``.
+    """
+    n_grid = pos_embed.shape[0] - 1 if has_cls_token else pos_embed.shape[0]
+    # Do not resize if already in same shape.
+    if n_grid == hw[0] * hw[1]:
+        return pos_embed
+    # Get original position embedding and extract ``[cls]`` token.
+    if has_cls_token:
+        cls_embed, pos_embed = pos_embed[[0]], pos_embed[1:]
+    orig_dim = int(pos_embed.shape[0] ** 0.5)
+    pos_embed = E.rearrange(pos_embed, "(h w) c -> 1 c h w", h=orig_dim)
+    pos_embed = F.interpolate(
+        pos_embed, hw, mode="bicubic", align_corners=False, antialias=True
+    )
+    pos_embed = E.rearrange(pos_embed, "1 c h w -> (h w) c")
+    # Add embedding of ``[cls]`` token back after resizing.
+    if has_cls_token:
+        pos_embed = torch.cat([cls_embed, pos_embed], dim=0)
+    return pos_embed
+def center_padding(images, patch_size):
+    _, _, h, w = images.shape
+    diff_h = h % patch_size
+    diff_w = w % patch_size
+    if diff_h == 0 and diff_w == 0:
+        return images
+    pad_h = patch_size - diff_h
+    pad_w = patch_size - diff_w
+    pad_t = pad_h // 2
+    pad_l = pad_w // 2
+    pad_r = pad_w - pad_l
+    pad_b = pad_h - pad_t
+    images = F.pad(images, (pad_l, pad_r, pad_t, pad_b))
+    return images
+def get_2d_sincos_pos_embed(embed_dim, grid_size, add_cls_token=False):
+    """
+    COPIED FROM TRANSFORMERS PACKAGE AND EDITED TO ALLOW FOR DIFFERENT WIDTH-HEIGHT
+    Create 2D sin/cos positional embeddings.
+    Args:
+        embed_dim (`int`):
+            Embedding dimension.
+        grid_size (`int`):
+            The grid height and width.
+        add_cls_token (`bool`, *optional*, defaults to `False`):
+            Whether or not to add a classification (CLS) token.
+    Returns:
+        (`torch.FloatTensor` of shape (grid_size*grid_size, embed_dim) or
+        (1+grid_size*grid_size, embed_dim): the
+        position embeddings (with or without classification token)
+    """
+    grid_h = np.arange(grid_size[0], dtype=np.float32)
+    grid_w = np.arange(grid_size[1], dtype=np.float32)
+    grid = np.meshgrid(grid_w, grid_h)  # here w goes first
+    grid = np.stack(grid, axis=0)
+    grid = grid.reshape([2, 1, grid_size[0], grid_size[1]])
+    pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
+    if add_cls_token:
+        pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
+    return pos_embed
+def tokens_to_output(output_type, dense_tokens, cls_token, feat_hw):
+    if output_type == "cls":
+        assert cls_token is not None
+        output = cls_token
+    elif output_type == "gap":
+        output = dense_tokens.mean(dim=1)
+    elif output_type == "dense":
+        h, w = feat_hw
+        dense_tokens = E.rearrange(dense_tokens, "b (h w) c -> b c h w", h=h, w=w)
+        output = dense_tokens.contiguous()
+    elif output_type == "dense-cls":
+        assert cls_token is not None
+        h, w = feat_hw
+        dense_tokens = E.rearrange(dense_tokens, "b (h w) c -> b c h w", h=h, w=w)
+        cls_token = cls_token[:, :, None, None].repeat(1, 1, h, w)
+        output = torch.cat((dense_tokens, cls_token), dim=1).contiguous()
+    else:
+        raise ValueError()
+    return output
+class DINO(torch.nn.Module):
+    def __init__(
+        self,
+        dino_name="dinov2",
+        model_name="vits14",
+        output="dense-cls",
+        layer=-1,
+        return_multilayer=True,
+    ):
+        super().__init__()
+        feat_dims = {
+            "vits14": 384,
+            "vitb8": 768,
+            "vitb16": 768,
+            "vitb14": 768,
+            "vitb14_reg": 768,
+            "vitl14": 1024,
+            "vitg14": 1536,
+        }
+        # get model
+        self.model_name = dino_name
+        self.checkpoint_name = f"{dino_name}_{model_name}"
+        dino_vit = torch.hub.load(f"facebookresearch/{dino_name}", self.checkpoint_name)
+        self.vit = dino_vit.eval().to(torch.float32)
+        self.has_registers = "_reg" in model_name
+        assert output in ["cls", "gap", "dense", "dense-cls"]
+        self.output = output
+        self.patch_size = self.vit.patch_embed.proj.kernel_size[0]
+        feat_dim = feat_dims[model_name]
+        feat_dim = feat_dim * 2 if output == "dense-cls" else feat_dim
+        num_layers = len(self.vit.blocks)
+        multilayers = [
+            num_layers // 4 - 1,
+            num_layers // 2 - 1,
+            num_layers // 4 * 3 - 1,
+            num_layers - 1,
+        ]
+        if return_multilayer:
+            self.feat_dim = [feat_dim, feat_dim, feat_dim, feat_dim]
+            self.multilayers = multilayers
+        else:
+            self.feat_dim = feat_dim
+            layer = multilayers[-1] if layer == -1 else layer
+            self.multilayers = [layer]
+        # define layer name (for logging)
+        self.layer = "-".join(str(_x) for _x in self.multilayers)
+    def forward(self, images):
+        # pad images (if needed) to ensure it matches patch_size
+        images = center_padding(images, self.patch_size)
+        h, w = images.shape[-2:]
+        h, w = h // self.patch_size, w // self.patch_size
+        if self.model_name == "dinov2":
+            x = self.vit.prepare_tokens_with_masks(images, None)
+        else:
+            x = self.vit.prepare_tokens(images)
+        embeds = []
+        for i, blk in enumerate(self.vit.blocks):
+            x = blk(x)
+            if i in self.multilayers:
+                embeds.append(x)
+                if len(embeds) == len(self.multilayers):
+                    break
+        num_spatial = h * w
+        outputs = []
+        for i, x_i in enumerate(embeds):
+            cls_tok = x_i[:, 0]
+            # ignoring register tokens
+            spatial = x_i[:, -1 * num_spatial :]
+            x_i = tokens_to_output(self.output, spatial, cls_tok, (h, w))
+            outputs.append(x_i)
+        return outputs[0] if len(outputs) == 1 else outputs

EdgeCape/models/detectors/EdgeCape.py ADDED Viewed

	@@ -0,0 +1,392 @@

+import math
+import cv2
+import mmcv
+import numpy as np
+import torch
+import torch.nn
+import torch.nn.functional as F
+from mmcv.image import imwrite
+from mmcv.visualization.image import imshow
+from mmpose.models import builder
+from mmpose.models.builder import POSENETS
+from mmpose.models.detectors.base import BasePose
+from EdgeCape.models.backbones.adapter import DPT
+from EdgeCape.models.backbones.dino import DINO
+@POSENETS.register_module()
+class EdgeCape(BasePose):
+    """
+    EdgeCape: Edge-aware Context-Aware Pose Estimation.
+    Args:
+        keypoint_head (dict): Config for keypoint head.
+        encoder_config (dict): Config for encoder.
+        train_cfg (dict): Config for training. Default: None.
+        test_cfg (dict): Config for testing. Default: None.
+        freeze_backbone (bool): If True, freeze backbone. Default: False.
+    """
+    def __init__(self,
+                 keypoint_head,
+                 encoder_config,
+                 train_cfg=None,
+                 test_cfg=None,
+                 freeze_backbone=False):
+        super().__init__()
+        feature_output_setting = encoder_config.get('output', 'dense-cls')
+        model_name = encoder_config.get('model_name', 'vits14')
+        self.encoder_sample = self.encoder_query = DINO(output=feature_output_setting, model_name=model_name)
+        self.probe = DPT(input_dims=self.encoder_query.feat_dim, output_dim=768)
+        self.backbone = 'dino_extractor'
+        self.freeze_backbone = freeze_backbone
+        if keypoint_head.get('freeze', None) is not None:
+            self.freeze_backbone = True
+        self.keypoint_head_module = builder.build_head(keypoint_head)
+        self.keypoint_head_module.init_weights()
+        self.train_cfg = train_cfg
+        self.test_cfg = test_cfg
+        self.target_type = test_cfg.get('target_type',
+                                        'GaussianHeatMap')  # GaussianHeatMap
+    @property
+    def with_keypoint(self):
+        """Check if has keypoint_head."""
+        return hasattr(self, 'keypoint_head_module')
+    def init_weights(self, pretrained=None):
+        """Weight initialization for model."""
+        self.encoder_sample.init_weights(pretrained)
+        self.encoder_query.init_weights(pretrained)
+        self.keypoint_head_module.init_weights()
+    def forward(self,
+                img_s,
+                img_q,
+                target_s=None,
+                target_weight_s=None,
+                target_q=None,
+                target_weight_q=None,
+                img_metas=None,
+                return_loss=True,
+                **kwargs):
+        """Calls either forward_train or forward_test depending on whether
+        return_loss=True. Note this setting will change the expected inputs.
+        When `return_loss=True`, img and img_meta are single-nested (i.e.
+        Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+        should be double nested (i.e.  List[Tensor], List[List[dict]]), with
+        the outer list indicating test time augmentations.
+        """
+        if return_loss:
+            return self.forward_train(img_s, target_s, target_weight_s, img_q,
+                                      target_q, target_weight_q, img_metas,
+                                      **kwargs)
+        else:
+            return self.forward_test(img_s, target_s, target_weight_s, img_q,
+                                     target_q, target_weight_q, img_metas,
+                                     **kwargs)
+    def forward_train(self,
+                      img_s,
+                      target_s,
+                      target_weight_s,
+                      img_q,
+                      target_q,
+                      target_weight_q,
+                      img_metas,
+                      **kwargs):
+        """Defines the computation performed at every call when training."""
+        bs, _, h, w = img_q.shape
+        random_mask = kwargs.get('rand_mask', None)
+        output, initial_proposals, similarity_map, mask_s, reconstructed_keypoints = self.predict(img_s,
+                                                                                                  target_s,
+                                                                                                  target_weight_s,
+                                                                                                  img_q,
+                                                                                                  img_metas,
+                                                                                                  random_mask)
+        # parse the img meta to get the target keypoints
+        device = output.device
+        target_keypoints = self.parse_keypoints_from_img_meta(img_metas,
+                                                              device,
+                                                              keyword='query')
+        target_sizes = torch.tensor(
+            [img_q.shape[-2], img_q.shape[-1]]).unsqueeze(0).repeat(
+            img_q.shape[0], 1, 1)
+        losses = dict()
+        if self.with_keypoint:
+            keypoint_losses = self.keypoint_head_module.get_loss(output,
+                                                                 initial_proposals,
+                                                                 similarity_map,
+                                                                 target_keypoints,
+                                                                 target_q,
+                                                                 target_weight_q * mask_s,
+                                                                 target_sizes,
+                                                                 reconstructed_keypoints,
+                                                                 )
+            losses.update(keypoint_losses)
+            keypoint_accuracy = self.keypoint_head_module.get_accuracy(output[-1],
+                                                                       target_keypoints,
+                                                                       target_weight_q * mask_s,
+                                                                       target_sizes,
+                                                                       height=h)
+            losses.update(keypoint_accuracy)
+        return losses
+    def forward_test(self,
+                     img_s,
+                     target_s,
+                     target_weight_s,
+                     img_q,
+                     target_q,
+                     target_weight_q,
+                     img_metas=None,
+                     vis_offset=True,
+                     **kwargs):
+        """Defines the computation performed at every call when testing."""
+        batch_size, _, img_height, img_width = img_q.shape
+        output, initial_proposals, similarity_map, mask_s, reconstructed_keypoints = self.predict(img_s,
+                                                                                                  target_s,
+                                                                                                  target_weight_s,
+                                                                                                  img_q,
+                                                                                                  img_metas
+                                                                                                  )
+        predicted_pose = output[-1].detach().cpu().numpy()
+        result = {}
+        if self.with_keypoint:
+            keypoint_result = self.keypoint_head_module.decode(img_metas, predicted_pose, img_size=[img_width, img_height])
+            result.update(keypoint_result)
+        if vis_offset:
+            result.update({"points": torch.cat((initial_proposals[None], output)).cpu().numpy()})
+        result.update({"sample_image_file": [img_metas[i]['sample_image_file'] for i in range(len(img_metas))]})
+        return result
+    def predict(self,
+                img_s,
+                target_s,
+                target_weight_s,
+                img_q,
+                img_metas=None,
+                random_mask=None):
+        batch_size, _, img_height, img_width = img_q.shape
+        assert [i['sample_skeleton'][0] != i['query_skeleton'] for i in img_metas]
+        mask_s = target_weight_s[0]
+        for target_weight in target_weight_s:
+            mask_s = mask_s * target_weight
+        feature_q, feature_s = self.extract_features(img_s, img_q)
+        skeleton_lst = [i['sample_skeleton'][0] for i in img_metas]
+        (output, initial_proposals, similarity_map, reconstructed_keypoints) = self.keypoint_head_module(
+            feature_q, feature_s, target_s, mask_s, skeleton_lst, random_mask=random_mask)
+        return output, initial_proposals, similarity_map, mask_s, reconstructed_keypoints
+    def extract_features(self, img_s, img_q):
+        with torch.no_grad():
+            dino_feature_s = [self.encoder_sample(img) for img in img_s]
+            dino_feature_q = self.encoder_query(img_q)  # [bs, 3, h, w]
+        if self.freeze_backbone:
+            with torch.no_grad():
+                feature_s = [self.probe(f) for f in dino_feature_s]
+                feature_q = self.probe(dino_feature_q)
+        else:
+            feature_s = [self.probe(f) for f in dino_feature_s]
+            feature_q = self.probe(dino_feature_q)
+        return feature_q, feature_s
+    def parse_keypoints_from_img_meta(self, img_meta, device, keyword='query'):
+        """Parse keypoints from the img_meta.
+        Args:
+            img_meta (dict): Image meta info.
+            device (torch.device): Device of the output keypoints.
+            keyword (str): 'query' or 'sample'. Default: 'query'.
+        Returns:
+            Tensor: Keypoints coordinates of query images.
+        """
+        if keyword == 'query':
+            query_kpt = torch.stack([
+                torch.tensor(info[f'{keyword}_joints_3d']).to(device) for info in img_meta], dim=0)[:, :, :2]
+        else:
+            query_kpt = []
+            for info in img_meta:
+                if isinstance(info[f'{keyword}_joints_3d'][0], torch.Tensor):
+                    samples = torch.stack(info[f'{keyword}_joints_3d'])
+                else:
+                    samples = np.array(info[f'{keyword}_joints_3d'])
+                query_kpt.append(torch.tensor(samples).to(device)[:, :, :2])
+            query_kpt = torch.stack(query_kpt, dim=0)  # [bs, , num_samples, num_query, 2]
+        return query_kpt
+    def get_full_similarity_map(self, feature_q, feature_s, h, w):
+        resized_feature_q = F.interpolate(feature_q, size=(h, w),
+                                          mode='bilinear')
+        resized_feature_s = [F.interpolate(s, size=(h, w), mode='bilinear') for
+                             s in feature_s]
+        return [self.chunk_cosine_sim(f_s, resized_feature_q) for f_s in
+                resized_feature_s]
+    # UNMODIFIED
+    def show_result(self,
+                    img,
+                    result,
+                    skeleton=None,
+                    kpt_score_thr=0.3,
+                    bbox_color='green',
+                    pose_kpt_color=None,
+                    pose_limb_color=None,
+                    radius=4,
+                    text_color=(255, 0, 0),
+                    thickness=1,
+                    font_scale=0.5,
+                    win_name='',
+                    show=False,
+                    wait_time=0,
+                    out_file=None):
+        """Draw `result` over `img`.
+        Args:
+            img (str or Tensor): The image to be displayed.
+            result (list[dict]): The results to draw over `img`
+                (bbox_result, pose_result).
+            kpt_score_thr (float, optional): Minimum score of keypoints
+                to be shown. Default: 0.3.
+            bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
+            pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+                If None, do not draw keypoints.
+            pose_limb_color (np.array[Mx3]): Color of M limbs.
+                If None, do not draw limbs.
+            text_color (str or tuple or :obj:`Color`): Color of texts.
+            thickness (int): Thickness of lines.
+            font_scale (float): Font scales of texts.
+            win_name (str): The window name.
+            wait_time (int): Value of waitKey param.
+                Default: 0.
+            out_file (str or None): The filename to write the image.
+                Default: None.
+        Returns:
+            Tensor: Visualized img, only if not `show` or `out_file`.
+        """
+        img = mmcv.imread(img)
+        img = img.copy()
+        img_h, img_w, _ = img.shape
+        bbox_result = []
+        pose_result = []
+        for res in result:
+            bbox_result.append(res['bbox'])
+            pose_result.append(res['keypoints'])
+        if len(bbox_result) > 0:
+            bboxes = np.vstack(bbox_result)
+            # draw bounding boxes
+            mmcv.imshow_bboxes(
+                img,
+                bboxes,
+                colors=bbox_color,
+                top_k=-1,
+                thickness=thickness,
+                show=False,
+                win_name=win_name,
+                wait_time=wait_time,
+                out_file=None)
+            for person_id, kpts in enumerate(pose_result):
+                # draw each point on image
+                if pose_kpt_color is not None:
+                    assert len(pose_kpt_color) == len(kpts), (
+                        len(pose_kpt_color), len(kpts))
+                    for kid, kpt in enumerate(kpts):
+                        x_coord, y_coord, kpt_score = int(kpt[0]), int(
+                            kpt[1]), kpt[2]
+                        if kpt_score > kpt_score_thr:
+                            img_copy = img.copy()
+                            r, g, b = pose_kpt_color[kid]
+                            cv2.circle(img_copy, (int(x_coord), int(y_coord)),
+                                       radius, (int(r), int(g), int(b)), -1)
+                            transparency = max(0, min(1, kpt_score))
+                            cv2.addWeighted(
+                                img_copy,
+                                transparency,
+                                img,
+                                1 - transparency,
+                                0,
+                                dst=img)
+                # draw limbs
+                if skeleton is not None and pose_limb_color is not None:
+                    assert len(pose_limb_color) == len(skeleton)
+                    for sk_id, sk in enumerate(skeleton):
+                        pos1 = (int(kpts[sk[0] - 1, 0]), int(kpts[sk[0] - 1,
+                        1]))
+                        pos2 = (int(kpts[sk[1] - 1, 0]), int(kpts[sk[1] - 1,
+                        1]))
+                        if (pos1[0] > 0 and pos1[0] < img_w and pos1[1] > 0
+                                and pos1[1] < img_h and pos2[0] > 0
+                                and pos2[0] < img_w and pos2[1] > 0
+                                and pos2[1] < img_h
+                                and kpts[sk[0] - 1, 2] > kpt_score_thr
+                                and kpts[sk[1] - 1, 2] > kpt_score_thr):
+                            img_copy = img.copy()
+                            X = (pos1[0], pos2[0])
+                            Y = (pos1[1], pos2[1])
+                            mX = np.mean(X)
+                            mY = np.mean(Y)
+                            length = ((Y[0] - Y[1]) ** 2 + (
+                                    X[0] - X[1]) ** 2) ** 0.5
+                            angle = math.degrees(
+                                math.atan2(Y[0] - Y[1], X[0] - X[1]))
+                            stickwidth = 2
+                            polygon = cv2.ellipse2Poly(
+                                (int(mX), int(mY)),
+                                (int(length / 2), int(stickwidth)), int(angle),
+                                0, 360, 1)
+                            r, g, b = pose_limb_color[sk_id]
+                            cv2.fillConvexPoly(img_copy, polygon,
+                                               (int(r), int(g), int(b)))
+                            transparency = max(
+                                0,
+                                min(
+                                    1, 0.5 *
+                                       (kpts[sk[0] - 1, 2] + kpts[
+                                           sk[1] - 1, 2])))
+                            cv2.addWeighted(
+                                img_copy,
+                                transparency,
+                                img,
+                                1 - transparency,
+                                0,
+                                dst=img)
+        show, wait_time = 1, 1
+        if show:
+            height, width = img.shape[:2]
+            max_ = max(height, width)
+            factor = min(1, 800 / max_)
+            enlarge = cv2.resize(
+                img, (0, 0),
+                fx=factor,
+                fy=factor,
+                interpolation=cv2.INTER_CUBIC)
+            imshow(enlarge, win_name, wait_time)
+        if out_file is not None:
+            imwrite(img, out_file)
+        return img

EdgeCape/models/detectors/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .EdgeCape import EdgeCape
2	+
3	+ __all__ = ['EdgeCape']

EdgeCape/models/detectors/__pycache__/EdgeCape.cpython-39.pyc ADDED Viewed

Binary file (11.6 kB). View file