Spaces:

facebook
/

sapiens-pose

Paused

App Files Files Community

rawalkhirodkar commited on Sep 7, 2024

Commit

28c256d

0 Parent(s):

Add initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +38 -0
.gitignore +1 -0
NOTES.md +11 -0
README.md +13 -0
app.py +453 -0
assets/checkpoints/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth +3 -0
assets/checkpoints/sapiens_0.3b_goliath_best_goliath_AP_575_torchscript.pt2 +3 -0
assets/checkpoints/sapiens_1b_goliath_best_goliath_AP_640_torchscript.pt2 +3 -0
assets/images/68204.png +3 -0
assets/images/68210.png +3 -0
assets/images/68658.png +3 -0
assets/images/68666.png +3 -0
assets/images/68691.png +3 -0
assets/images/68956.png +3 -0
assets/images/pexels-amresh444-17315601.png +3 -0
assets/images/pexels-gabby-k-6311686.png +3 -0
assets/images/pexels-julia-m-cameron-4145040.png +3 -0
assets/images/pexels-marcus-aurelius-6787357.png +3 -0
assets/images/pexels-mo-saeed-3616599-5409085.png +3 -0
assets/images/pexels-riedelmax-27355495.png +3 -0
assets/images/pexels-sergeymakashin-5368660.png +3 -0
assets/images/pexels-vinicius-wiesehofer-289347-4219918.png +3 -0
assets/rtmdet_m_640-8xb32_coco-person_no_nms.py +20 -0
build_wheel.py +26 -0
classes_and_palettes.py +1024 -0
detector_utils.py +196 -0
external/cv/.gitignore +125 -0
external/cv/MANIFEST.in +6 -0
external/cv/dist/sapiens_cv-1.0.0-cp310-cp310-linux_x86_64.whl +3 -0
external/cv/mmcv/__init__.py +18 -0
external/cv/mmcv/arraymisc/__init__.py +9 -0
external/cv/mmcv/arraymisc/quantization.py +70 -0
external/cv/mmcv/cnn/__init__.py +33 -0
external/cv/mmcv/cnn/alexnet.py +68 -0
external/cv/mmcv/cnn/bricks/__init__.py +37 -0
external/cv/mmcv/cnn/bricks/activation.py +119 -0
external/cv/mmcv/cnn/bricks/context_block.py +131 -0
external/cv/mmcv/cnn/bricks/conv.py +56 -0
external/cv/mmcv/cnn/bricks/conv2d_adaptive_padding.py +68 -0
external/cv/mmcv/cnn/bricks/conv_module.py +343 -0
external/cv/mmcv/cnn/bricks/conv_ws.py +158 -0
external/cv/mmcv/cnn/bricks/depthwise_separable_conv_module.py +104 -0
external/cv/mmcv/cnn/bricks/drop.py +72 -0
external/cv/mmcv/cnn/bricks/generalized_attention.py +416 -0
external/cv/mmcv/cnn/bricks/hsigmoid.py +55 -0
external/cv/mmcv/cnn/bricks/hswish.py +44 -0
external/cv/mmcv/cnn/bricks/non_local.py +313 -0
external/cv/mmcv/cnn/bricks/norm.py +161 -0
external/cv/mmcv/cnn/bricks/padding.py +48 -0
external/cv/mmcv/cnn/bricks/plugin.py +106 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,38 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.pt2 filter=lfs diff=lfs merge=lfs -text
+*.whl filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ pycache

NOTES.md ADDED Viewed

	@@ -0,0 +1,11 @@

+## Create wheel for mmcv
+```
+cd ./external/engine
+python setup.py bdist_wheel
+cd ./external/cv
+MMCV_WITH_OPS=1 python setup.py bdist_wheel
+cd ./external/det
+python setup.py bdist_wheel
+```

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Sapiens Pose
+emoji: 📊
+colorFrom: pink
+colorTo: yellow
+sdk: gradio
+sdk_version: 4.42.0
+app_file: app.py
+pinned: false
+license: cc-by-nc-4.0
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,453 @@

+import os
+from typing import List
+import spaces
+import gradio as gr
+import numpy as np
+import torch
+import json
+import tempfile
+import torch.nn.functional as F
+from torchvision import transforms
+from PIL import Image
+import cv2
+from gradio.themes.utils import sizes
+from classes_and_palettes import (
+    COCO_KPTS_COLORS,
+    COCO_WHOLEBODY_KPTS_COLORS,
+    GOLIATH_KPTS_COLORS,
+    GOLIATH_SKELETON_INFO,
+    GOLIATH_KEYPOINTS
+)
+import os
+import sys
+import subprocess
+import importlib.util
+def is_package_installed(package_name):
+    return importlib.util.find_spec(package_name) is not None
+def find_wheel(package_path):
+    dist_dir = os.path.join(package_path, "dist")
+    if os.path.exists(dist_dir):
+        wheel_files = [f for f in os.listdir(dist_dir) if f.endswith('.whl')]
+        if wheel_files:
+            return os.path.join(dist_dir, wheel_files[0])
+    return None
+def install_from_wheel(package_name, package_path):
+    wheel_file = find_wheel(package_path)
+    if wheel_file:
+        print(f"Installing {package_name} from wheel: {wheel_file}")
+        subprocess.check_call([sys.executable, "-m", "pip", "install", wheel_file])
+    else:
+        print(f"{package_name} wheel not found in {package_path}. Please build it first.")
+        sys.exit(1)
+def install_local_packages():
+    packages = [
+        ("mmengine", "./external/engine"),
+        ("mmcv", "./external/cv"),
+        ("mmdet", "./external/det")
+    ]
+    for package_name, package_path in packages:
+        if not is_package_installed(package_name):
+            print(f"Installing {package_name}...")
+            install_from_wheel(package_name, package_path)
+        else:
+            print(f"{package_name} is already installed.")
+# Run the installation at the start of your app
+install_local_packages()
+from detector_utils import (
+            adapt_mmdet_pipeline,
+            init_detector,
+            process_images_detector,
+        )
+class Config:
+    ASSETS_DIR = os.path.join(os.path.dirname(__file__), 'assets')
+    CHECKPOINTS_DIR = os.path.join(ASSETS_DIR, "checkpoints")
+    CHECKPOINTS = {
+        "0.3b": "sapiens_0.3b_goliath_best_goliath_AP_575_torchscript.pt2",
+        "1b": "sapiens_1b_goliath_best_goliath_AP_640_torchscript.pt2",
+    }
+    DETECTION_CHECKPOINT = os.path.join(CHECKPOINTS_DIR, 'rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth')
+    DETECTION_CONFIG = os.path.join(ASSETS_DIR, 'rtmdet_m_640-8xb32_coco-person_no_nms.py')
+class ModelManager:
+    @staticmethod
+    def load_model(checkpoint_name: str):
+        if checkpoint_name is None:
+            return None
+        checkpoint_path = os.path.join(Config.CHECKPOINTS_DIR, checkpoint_name)
+        model = torch.jit.load(checkpoint_path)
+        model.eval()
+        model.to("cuda")
+        return model
+    @staticmethod
+    @torch.inference_mode()
+    def run_model(model, input_tensor):
+        return model(input_tensor)
+class ImageProcessor:
+    def __init__(self):
+        self.transform = transforms.Compose([
+            transforms.Resize((1024, 768)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[123.5/255, 116.5/255, 103.5/255],
+                                 std=[58.5/255, 57.0/255, 57.5/255])
+        ])
+        self.detector = init_detector(
+            Config.DETECTION_CONFIG, Config.DETECTION_CHECKPOINT, device='cpu'
+        )
+        self.detector.cfg = adapt_mmdet_pipeline(self.detector.cfg)
+    def detect_persons(self, image: Image.Image):
+        # Convert PIL Image to tensor
+        image = np.array(image)
+        image = np.expand_dims(image, axis=0)
+        # Perform person detection
+        bboxes_batch = process_images_detector(
+            image,
+            self.detector
+        )
+        bboxes = self.get_person_bboxes(bboxes_batch[0])  # Get bboxes for the first (and only) image
+        return bboxes
+    def get_person_bboxes(self, bboxes_batch, score_thr=0.3):
+        person_bboxes = []
+        for bbox in bboxes_batch:
+            if len(bbox) == 5:  # [x1, y1, x2, y2, score]
+                if bbox[4] > score_thr:
+                    person_bboxes.append(bbox)
+            elif len(bbox) == 4:  # [x1, y1, x2, y2]
+                person_bboxes.append(bbox + [1.0])  # Add a default score of 1.0
+        return person_bboxes
+    @spaces.GPU
+    @torch.inference_mode()
+    def estimate_pose(self, image: Image.Image, bboxes: List[List[float]], model_name: str, kpt_threshold: float):
+        pose_model = ModelManager.load_model(Config.CHECKPOINTS[model_name])
+        result_image = image.copy()
+        all_keypoints = []  # List to store keypoints for all persons
+        for bbox in bboxes:
+            cropped_img = self.crop_image(result_image, bbox)
+            input_tensor = self.transform(cropped_img).unsqueeze(0).to("cuda")
+            heatmaps = ModelManager.run_model(pose_model, input_tensor)
+            keypoints = self.heatmaps_to_keypoints(heatmaps[0].cpu().numpy())
+            all_keypoints.append(keypoints)  # Collect keypoints
+            result_image = self.draw_keypoints(result_image, keypoints, bbox, kpt_threshold)
+        return result_image, all_keypoints
+    def process_image(self, image: Image.Image, model_name: str, kpt_threshold: str):
+        bboxes = self.detect_persons(image)
+        result_image, keypoints = self.estimate_pose(image, bboxes, model_name, float(kpt_threshold))
+        return result_image, keypoints
+    def crop_image(self, image, bbox):
+        if len(bbox) == 4:
+            x1, y1, x2, y2 = map(int, bbox)
+        elif len(bbox) >= 5:
+            x1, y1, x2, y2, _ = map(int, bbox[:5])
+        else:
+            raise ValueError(f"Unexpected bbox format: {bbox}")
+        crop = image.crop((x1, y1, x2, y2))
+        return crop
+    @staticmethod
+    def heatmaps_to_keypoints(heatmaps):
+        num_joints = heatmaps.shape[0]  # Should be 308
+        keypoints = {}
+        for i, name in enumerate(GOLIATH_KEYPOINTS):
+            if i < num_joints:
+                heatmap = heatmaps[i]
+                y, x = np.unravel_index(np.argmax(heatmap), heatmap.shape)
+                conf = heatmap[y, x]
+                keypoints[name] = (float(x), float(y), float(conf))
+        return keypoints
+    @staticmethod
+    def draw_keypoints(image, keypoints, bbox, kpt_threshold):
+        image = np.array(image)
+        # Handle both 4 and 5-element bounding boxes
+        if len(bbox) == 4:
+            x1, y1, x2, y2 = map(int, bbox)
+        elif len(bbox) >= 5:
+            x1, y1, x2, y2, _ = map(int, bbox[:5])
+        else:
+            raise ValueError(f"Unexpected bbox format: {bbox}")
+        # Calculate adaptive radius and thickness based on bounding box size
+        bbox_width = x2 - x1
+        bbox_height = y2 - y1
+        bbox_size = np.sqrt(bbox_width * bbox_height)
+        radius = max(1, int(bbox_size * 0.006))  # minimum 1 pixel
+        thickness = max(1, int(bbox_size * 0.006))  # minimum 1 pixel
+        bbox_thickness = max(1, thickness//4)
+        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), bbox_thickness)
+        # Draw keypoints
+        for i, (name, (x, y, conf)) in enumerate(keypoints.items()):
+            if conf > kpt_threshold and i < len(GOLIATH_KPTS_COLORS):
+                x_coord = int(x * bbox_width / 192) + x1
+                y_coord = int(y * bbox_height / 256) + y1
+                color = GOLIATH_KPTS_COLORS[i]
+                cv2.circle(image, (x_coord, y_coord), radius, color, -1)
+        # Draw skeleton
+        for _, link_info in GOLIATH_SKELETON_INFO.items():
+            pt1_name, pt2_name = link_info['link']
+            color = link_info['color']
+            if pt1_name in keypoints and pt2_name in keypoints:
+                pt1 = keypoints[pt1_name]
+                pt2 = keypoints[pt2_name]
+                if pt1[2] > kpt_threshold and pt2[2] > kpt_threshold:
+                    x1_coord = int(pt1[0] * bbox_width / 192) + x1
+                    y1_coord = int(pt1[1] * bbox_height / 256) + y1
+                    x2_coord = int(pt2[0] * bbox_width / 192) + x1
+                    y2_coord = int(pt2[1] * bbox_height / 256) + y1
+                    cv2.line(image, (x1_coord, y1_coord), (x2_coord, y2_coord), color, thickness=thickness)
+        return Image.fromarray(image)
+class GradioInterface:
+    def __init__(self):
+        self.image_processor = ImageProcessor()
+    def create_interface(self):
+        app_styles = """
+        <style>
+            /* Global Styles */
+            body, #root {
+                font-family: Helvetica, Arial, sans-serif;
+                background-color: #1a1a1a;
+                color: #fafafa;
+            }
+            /* Header Styles */
+            .app-header {
+                background: linear-gradient(45deg, #1a1a1a 0%, #333333 100%);
+                padding: 24px;
+                border-radius: 8px;
+                margin-bottom: 24px;
+                text-align: center;
+            }
+            .app-title {
+                font-size: 48px;
+                margin: 0;
+                color: #fafafa;
+            }
+            .app-subtitle {
+                font-size: 24px;
+                margin: 8px 0 16px;
+                color: #fafafa;
+            }
+            .app-description {
+                font-size: 16px;
+                line-height: 1.6;
+                opacity: 0.8;
+                margin-bottom: 24px;
+            }
+            /* Button Styles */
+            .publication-links {
+                display: flex;
+                justify-content: center;
+                flex-wrap: wrap;
+                gap: 8px;
+                margin-bottom: 16px;
+            }
+            .publication-link {
+                display: inline-flex;
+                align-items: center;
+                padding: 8px 16px;
+                background-color: #333;
+                color: #fff !important;
+                text-decoration: none !important;
+                border-radius: 20px;
+                font-size: 14px;
+                transition: background-color 0.3s;
+            }
+            .publication-link:hover {
+                background-color: #555;
+            }
+            .publication-link i {
+                margin-right: 8px;
+            }
+            /* Content Styles */
+            .content-container {
+                background-color: #2a2a2a;
+                border-radius: 8px;
+                padding: 24px;
+                margin-bottom: 24px;
+            }
+            /* Image Styles */
+            .image-preview img {
+                max-width: 512px;
+                max-height: 512px;
+                margin: 0 auto;
+                border-radius: 4px;
+                display: block;
+                object-fit: contain;
+            }
+            /* Control Styles */
+            .control-panel {
+                background-color: #333;
+                padding: 16px;
+                border-radius: 8px;
+                margin-top: 16px;
+            }
+            /* Gradio Component Overrides */
+            .gr-button {
+                background-color: #4a4a4a;
+                color: #fff;
+                border: none;
+                border-radius: 4px;
+                padding: 8px 16px;
+                cursor: pointer;
+                transition: background-color 0.3s;
+            }
+            .gr-button:hover {
+                background-color: #5a5a5a;
+            }
+            .gr-input, .gr-dropdown {
+                background-color: #3a3a3a;
+                color: #fff;
+                border: 1px solid #4a4a4a;
+                border-radius: 4px;
+                padding: 8px;
+            }
+            .gr-form {
+                background-color: transparent;
+            }
+            .gr-panel {
+                border: none;
+                background-color: transparent;
+            }
+            /* Override any conflicting styles from Bulma */
+            .button.is-normal.is-rounded.is-dark {
+                color: #fff !important;
+                text-decoration: none !important;
+            }
+        </style>
+        """
+        header_html = f"""
+        <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/css/bulma.min.css">
+        <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
+        {app_styles}
+        <div class="app-header">
+            <h1 class="app-title">Sapiens: Pose Estimation</h1>
+            <h2 class="app-subtitle">ECCV 2024 (Oral)</h2>
+            <p class="app-description">
+                Meta presents Sapiens, foundation models for human tasks pretrained on 300 million human images.
+                This demo showcases the finetuned pose estimation model. <br>
+            </p>
+            <div class="publication-links">
+                <a href="https://arxiv.org/abs/2408.12569" class="publication-link">
+                    <i class="fas fa-file-pdf"></i>arXiv
+                </a>
+                <a href="https://github.com/facebookresearch/sapiens" class="publication-link">
+                    <i class="fab fa-github"></i>Code
+                </a>
+                <a href="https://about.meta.com/realitylabs/codecavatars/sapiens/" class="publication-link">
+                    <i class="fas fa-globe"></i>Meta
+                </a>
+                <a href="https://rawalkhirodkar.github.io/sapiens" class="publication-link">
+                    <i class="fas fa-chart-bar"></i>Results
+                </a>
+            </div>
+            <div class="publication-links">
+                <a href="https://huggingface.co/spaces/facebook/sapiens_pose" class="publication-link">
+                    <i class="fas fa-user"></i>Demo-Pose
+                </a>
+                <a href="https://huggingface.co/spaces/facebook/sapiens_seg" class="publication-link">
+                    <i class="fas fa-puzzle-piece"></i>Demo-Seg
+                </a>
+                <a href="https://huggingface.co/spaces/facebook/sapiens_depth" class="publication-link">
+                    <i class="fas fa-cube"></i>Demo-Depth
+                </a>
+                <a href="https://huggingface.co/spaces/facebook/sapiens_normal" class="publication-link">
+                    <i class="fas fa-vector-square"></i>Demo-Normal
+                </a>
+            </div>
+        </div>
+        """
+        js_func = """
+        function refresh() {
+            const url = new URL(window.location);
+            if (url.searchParams.get('__theme') !== 'dark') {
+                url.searchParams.set('__theme', 'dark');
+                window.location.href = url.href;
+            }
+        }
+        """
+        def process_image(image, model_name, kpt_threshold):
+            result_image, keypoints = self.image_processor.process_image(image, model_name, kpt_threshold)
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode='w') as json_file:
+                json.dump(keypoints, json_file)
+                json_file_path = json_file.name
+            return result_image, json_file_path
+        with gr.Blocks(js=js_func, theme=gr.themes.Default()) as demo:
+            gr.HTML(header_html)
+            with gr.Row(elem_classes="content-container"):
+                with gr.Column():
+                    input_image = gr.Image(label="Input Image", type="pil", format="png", elem_classes="image-preview")
+                    with gr.Row():
+                        model_name = gr.Dropdown(
+                            label="Model Size",
+                            choices=list(Config.CHECKPOINTS.keys()),
+                            value="1b",
+                        )
+                        kpt_threshold = gr.Dropdown(
+                            label="Min Keypoint Confidence",
+                            choices=["0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8", "0.9"],
+                            value="0.3",
+                        )
+                    example_model = gr.Examples(
+                        inputs=input_image,
+                        examples_per_page=14,
+                        examples=[
+                            os.path.join(Config.ASSETS_DIR, "images", img)
+                            for img in os.listdir(os.path.join(Config.ASSETS_DIR, "images"))
+                        ],
+                    )
+                with gr.Column():
+                    result_image = gr.Image(label="Pose-308 Result", type="pil", elem_classes="image-preview")
+                    json_output = gr.File(label="Pose-308 Output (.json)")
+                    run_button = gr.Button("Run")
+            run_button.click(
+                fn=process_image,
+                inputs=[input_image, model_name, kpt_threshold],
+                outputs=[result_image, json_output],
+            )
+        return demo
+def main():
+    if torch.cuda.is_available():
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+    interface = GradioInterface()
+    demo = interface.create_interface()
+    demo.launch(share=False)
+if __name__ == "__main__":
+    main()

assets/checkpoints/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b66b27072c6a3cd4f093882df440921987076131fb78a7df7b1cf92d67f41509
+size 99149914

assets/checkpoints/sapiens_0.3b_goliath_best_goliath_AP_575_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21cf7e3e723720d847bee6d3b321bfcdb33268c9f1418d7552552264ae0a5a9b
+size 1319579523

assets/checkpoints/sapiens_1b_goliath_best_goliath_AP_640_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6218c6be17697157f9e65ee34054a94ab8ca0f637380fa5748c18e04814976e
+size 4677162331

assets/images/68204.png ADDED Viewed

Git LFS Details

SHA256: 9b0268cb801ed164864a4b5f6d131e0ac5cc2fbd149a6467d5d0c97da47122c2
Pointer size: 132 Bytes
Size of remote file: 4.29 MB

assets/images/68210.png ADDED Viewed

Git LFS Details

SHA256: dbe5f80498af4ebd1ff09ae4184f37c20ba981e53bd554c3cc78d39ae0ee7fd7
Pointer size: 132 Bytes
Size of remote file: 3.93 MB

assets/images/68658.png ADDED Viewed

Git LFS Details

SHA256: 61a68b619bd17235e683324f2826ce0693322e45ab8c86f1c057851ecb333ac7
Pointer size: 132 Bytes
Size of remote file: 5.1 MB

assets/images/68666.png ADDED Viewed

Git LFS Details

SHA256: ea3047e6c2ccb485fdb3966aa2325e803cbf49c27c0bff00287b44bc16f18914
Pointer size: 132 Bytes
Size of remote file: 4.56 MB

assets/images/68691.png ADDED Viewed

Git LFS Details

SHA256: fae39e4055c1b297af7068cdddfeeba8d685363281b839d8c5afac1980204b57
Pointer size: 132 Bytes
Size of remote file: 3.74 MB

assets/images/68956.png ADDED Viewed

Git LFS Details

SHA256: eee1f27082b10999d0fa848121ecb06cda3386b1a864b9aa0f59ae78261f8908
Pointer size: 132 Bytes
Size of remote file: 4.15 MB

assets/images/pexels-amresh444-17315601.png ADDED Viewed

Git LFS Details

SHA256: 4e17ee1b229147e4b52e8348a6ef426bc9e9a2f90738e776e15b26b325abb9b3
Pointer size: 132 Bytes
Size of remote file: 3.5 MB

assets/images/pexels-gabby-k-6311686.png ADDED Viewed

Git LFS Details

SHA256: 3f10eded3fb05ab04b963f7b9fd2e183d8d4e81b20569b1c6b0653549639421f
Pointer size: 132 Bytes
Size of remote file: 3.65 MB

assets/images/pexels-julia-m-cameron-4145040.png ADDED Viewed

Git LFS Details

SHA256: 459cf0280667b028ffbca16aa11188780d7a0205c0defec02916ff3cbaeecb72
Pointer size: 132 Bytes
Size of remote file: 2.92 MB

assets/images/pexels-marcus-aurelius-6787357.png ADDED Viewed

Git LFS Details

SHA256: 7d35452f76492125eaf7d5783aa9fd6b0d5990ebe0579fe9dfd58a9d634f4955
Pointer size: 132 Bytes
Size of remote file: 3.3 MB

assets/images/pexels-mo-saeed-3616599-5409085.png ADDED Viewed

Git LFS Details

SHA256: 7c1ca7afd6c2a654e94ef59d5fb56fca4f3cde5fb5216f6b218c34a7b8c143dc
Pointer size: 132 Bytes
Size of remote file: 3.13 MB

assets/images/pexels-riedelmax-27355495.png ADDED Viewed

Git LFS Details

SHA256: 4141d2f5f718f162ea1f6710c06b28b5cb51fd69598fde35948f8f3491228164
Pointer size: 132 Bytes
Size of remote file: 3.73 MB

assets/images/pexels-sergeymakashin-5368660.png ADDED Viewed

Git LFS Details

SHA256: af8f5a8f26dd102d87d94c1be36ec903791fe8e6d951c68ebb9ebcfc6d7397bb
Pointer size: 132 Bytes
Size of remote file: 4.08 MB

assets/images/pexels-vinicius-wiesehofer-289347-4219918.png ADDED Viewed

Git LFS Details

SHA256: a6eef5eee15b81fe65ea95627e9a46040b9889466689b3c1ca6ed273e02fe84f
Pointer size: 132 Bytes
Size of remote file: 3.63 MB

assets/rtmdet_m_640-8xb32_coco-person_no_nms.py ADDED Viewed

	@@ -0,0 +1,20 @@

+_base_ = 'mmdet::rtmdet/rtmdet_m_8xb32-300e_coco.py'
+checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth'  # noqa
+model = dict(
+    backbone=dict(
+        init_cfg=dict(
+            type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
+    bbox_head=dict(num_classes=1),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=None,
+        max_per_img=100))
+train_dataloader = dict(dataset=dict(metainfo=dict(classes=('person', ))))
+val_dataloader = dict(dataset=dict(metainfo=dict(classes=('person', ))))
+test_dataloader = val_dataloader

build_wheel.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import os
+import subprocess
+import sys
+def build_wheel(package_path):
+    current_dir = os.getcwd()
+    os.chdir(package_path)
+    try:
+        subprocess.check_call([sys.executable, "setup.py", "bdist_wheel"])
+    finally:
+        os.chdir(current_dir)
+def main():
+    packages = [
+        "./external/engine",
+        "./external/cv",
+        "./external/det"
+    ]
+    for package in packages:
+        print(f"Building wheel for {package}...")
+        build_wheel(package)
+        print(f"Wheel built for {package}")
+if __name__ == "__main__":
+    main()

classes_and_palettes.py ADDED Viewed

	@@ -0,0 +1,1024 @@

+COCO_KPTS_COLORS = [
+    [51, 153, 255],   # 0: nose
+    [51, 153, 255],   # 1: left_eye
+    [51, 153, 255],   # 2: right_eye
+    [51, 153, 255],   # 3: left_ear
+    [51, 153, 255],   # 4: right_ear
+    [0, 255, 0],      # 5: left_shoulder
+    [255, 128, 0],    # 6: right_shoulder
+    [0, 255, 0],      # 7: left_elbow
+    [255, 128, 0],    # 8: right_elbow
+    [0, 255, 0],      # 9: left_wrist
+    [255, 128, 0],    # 10: right_wrist
+    [0, 255, 0],      # 11: left_hip
+    [255, 128, 0],    # 12: right_hip
+    [0, 255, 0],      # 13: left_knee
+    [255, 128, 0],    # 14: right_knee
+    [0, 255, 0],      # 15: left_ankle
+    [255, 128, 0],    # 16: right_ankle
+]
+COCO_WHOLEBODY_KPTS_COLORS = [
+    [51, 153, 255],   # 0: nose
+    [51, 153, 255],   # 1: left_eye
+    [51, 153, 255],   # 2: right_eye
+    [51, 153, 255],   # 3: left_ear
+    [51, 153, 255],   # 4: right_ear
+    [0, 255, 0],      # 5: left_shoulder
+    [255, 128, 0],    # 6: right_shoulder
+    [0, 255, 0],      # 7: left_elbow
+    [255, 128, 0],    # 8: right_elbow
+    [0, 255, 0],      # 9: left_wrist
+    [255, 128, 0],    # 10: right_wrist
+    [0, 255, 0],      # 11: left_hip
+    [255, 128, 0],    # 12: right_hip
+    [0, 255, 0],      # 13: left_knee
+    [255, 128, 0],    # 14: right_knee
+    [0, 255, 0],      # 15: left_ankle
+    [255, 128, 0],    # 16: right_ankle
+    [255, 128, 0],    # 17: left_big_toe
+    [255, 128, 0],    # 18: left_small_toe
+    [255, 128, 0],    # 19: left_heel
+    [255, 128, 0],    # 20: right_big_toe
+    [255, 128, 0],    # 21: right_small_toe
+    [255, 128, 0],    # 22: right_heel
+    [255, 255, 255],  # 23: face-0
+    [255, 255, 255],  # 24: face-1
+    [255, 255, 255],  # 25: face-2
+    [255, 255, 255],  # 26: face-3
+    [255, 255, 255],  # 27: face-4
+    [255, 255, 255],  # 28: face-5
+    [255, 255, 255],  # 29: face-6
+    [255, 255, 255],  # 30: face-7
+    [255, 255, 255],  # 31: face-8
+    [255, 255, 255],  # 32: face-9
+    [255, 255, 255],  # 33: face-10
+    [255, 255, 255],  # 34: face-11
+    [255, 255, 255],  # 35: face-12
+    [255, 255, 255],  # 36: face-13
+    [255, 255, 255],  # 37: face-14
+    [255, 255, 255],  # 38: face-15
+    [255, 255, 255],  # 39: face-16
+    [255, 255, 255],  # 40: face-17
+    [255, 255, 255],  # 41: face-18
+    [255, 255, 255],  # 42: face-19
+    [255, 255, 255],  # 43: face-20
+    [255, 255, 255],  # 44: face-21
+    [255, 255, 255],  # 45: face-22
+    [255, 255, 255],  # 46: face-23
+    [255, 255, 255],  # 47: face-24
+    [255, 255, 255],  # 48: face-25
+    [255, 255, 255],  # 49: face-26
+    [255, 255, 255],  # 50: face-27
+    [255, 255, 255],  # 51: face-28
+    [255, 255, 255],  # 52: face-29
+    [255, 255, 255],  # 53: face-30
+    [255, 255, 255],  # 54: face-31
+    [255, 255, 255],  # 55: face-32
+    [255, 255, 255],  # 56: face-33
+    [255, 255, 255],  # 57: face-34
+    [255, 255, 255],  # 58: face-35
+    [255, 255, 255],  # 59: face-36
+    [255, 255, 255],  # 60: face-37
+    [255, 255, 255],  # 61: face-38
+    [255, 255, 255],  # 62: face-39
+    [255, 255, 255],  # 63: face-40
+    [255, 255, 255],  # 64: face-41
+    [255, 255, 255],  # 65: face-42
+    [255, 255, 255],  # 66: face-43
+    [255, 255, 255],  # 67: face-44
+    [255, 255, 255],  # 68: face-45
+    [255, 255, 255],  # 69: face-46
+    [255, 255, 255],  # 70: face-47
+    [255, 255, 255],  # 71: face-48
+    [255, 255, 255],  # 72: face-49
+    [255, 255, 255],  # 73: face-50
+    [255, 255, 255],  # 74: face-51
+    [255, 255, 255],  # 75: face-52
+    [255, 255, 255],  # 76: face-53
+    [255, 255, 255],  # 77: face-54
+    [255, 255, 255],  # 78: face-55
+    [255, 255, 255],  # 79: face-56
+    [255, 255, 255],  # 80: face-57
+    [255, 255, 255],  # 81: face-58
+    [255, 255, 255],  # 82: face-59
+    [255, 255, 255],  # 83: face-60
+    [255, 255, 255],  # 84: face-61
+    [255, 255, 255],  # 85: face-62
+    [255, 255, 255],  # 86: face-63
+    [255, 255, 255],  # 87: face-64
+    [255, 255, 255],  # 88: face-65
+    [255, 255, 255],  # 89: face-66
+    [255, 255, 255],  # 90: face-67
+    [255, 255, 255],  # 91: left_hand_root
+    [255, 128, 0],    # 92: left_thumb1
+    [255, 128, 0],    # 93: left_thumb2
+    [255, 128, 0],    # 94: left_thumb3
+    [255, 128, 0],    # 95: left_thumb4
+    [255, 153, 255],  # 96: left_forefinger1
+    [255, 153, 255],  # 97: left_forefinger2
+    [255, 153, 255],  # 98: left_forefinger3
+    [255, 153, 255],  # 99: left_forefinger4
+    [102, 178, 255],  # 100: left_middle_finger1
+    [102, 178, 255],  # 101: left_middle_finger2
+    [102, 178, 255],  # 102: left_middle_finger3
+    [102, 178, 255],  # 103: left_middle_finger4
+    [255, 51, 51],    # 104: left_ring_finger1
+    [255, 51, 51],    # 105: left_ring_finger2
+    [255, 51, 51],    # 106: left_ring_finger3
+    [255, 51, 51],    # 107: left_ring_finger4
+    [0, 255, 0],      # 108: left_pinky_finger1
+    [0, 255, 0],      # 109: left_pinky_finger2
+    [0, 255, 0],      # 110: left_pinky_finger3
+    [0, 255, 0],      # 111: left_pinky_finger4
+    [255, 255, 255],  # 112: right_hand_root
+    [255, 128, 0],    # 113: right_thumb1
+    [255, 128, 0],    # 114: right_thumb2
+    [255, 128, 0],    # 115: right_thumb3
+    [255, 128, 0],    # 116: right_thumb4
+    [255, 153, 255],  # 117: right_forefinger1
+    [255, 153, 255],  # 118: right_forefinger2
+    [255, 153, 255],  # 119: right_forefinger3
+    [255, 153, 255],  # 120: right_forefinger4
+    [102, 178, 255],  # 121: right_middle_finger1
+    [102, 178, 255],  # 122: right_middle_finger2
+    [102, 178, 255],  # 123: right_middle_finger3
+    [102, 178, 255],  # 124: right_middle_finger4
+    [255, 51, 51],    # 125: right_ring_finger1
+    [255, 51, 51],    # 126: right_ring_finger2
+    [255, 51, 51],    # 127: right_ring_finger3
+    [255, 51, 51],    # 128: right_ring_finger4
+    [0, 255, 0],      # 129: right_pinky_finger1
+    [0, 255, 0],      # 130: right_pinky_finger2
+    [0, 255, 0],      # 131: right_pinky_finger3
+    [0, 255, 0],      # 132: right_pinky_finger4
+]
+GOLIATH_KPTS_COLORS = [
+    [51, 153, 255],   # 0: nose
+    [51, 153, 255],   # 1: left_eye
+    [51, 153, 255],   # 2: right_eye
+    [51, 153, 255],   # 3: left_ear
+    [51, 153, 255],   # 4: right_ear
+    [51, 153, 255],   # 5: left_shoulder
+    [51, 153, 255],   # 6: right_shoulder
+    [51, 153, 255],   # 7: left_elbow
+    [51, 153, 255],   # 8: right_elbow
+    [51, 153, 255],   # 9: left_hip
+    [51, 153, 255],   # 10: right_hip
+    [51, 153, 255],   # 11: left_knee
+    [51, 153, 255],   # 12: right_knee
+    [51, 153, 255],   # 13: left_ankle
+    [51, 153, 255],   # 14: right_ankle
+    [51, 153, 255],   # 15: left_big_toe
+    [51, 153, 255],   # 16: left_small_toe
+    [51, 153, 255],   # 17: left_heel
+    [51, 153, 255],   # 18: right_big_toe
+    [51, 153, 255],   # 19: right_small_toe
+    [51, 153, 255],   # 20: right_heel
+    [51, 153, 255],   # 21: right_thumb4
+    [51, 153, 255],   # 22: right_thumb3
+    [51, 153, 255],   # 23: right_thumb2
+    [51, 153, 255],   # 24: right_thumb_third_joint
+    [51, 153, 255],   # 25: right_forefinger4
+    [51, 153, 255],   # 26: right_forefinger3
+    [51, 153, 255],   # 27: right_forefinger2
+    [51, 153, 255],   # 28: right_forefinger_third_joint
+    [51, 153, 255],   # 29: right_middle_finger4
+    [51, 153, 255],   # 30: right_middle_finger3
+    [51, 153, 255],   # 31: right_middle_finger2
+    [51, 153, 255],   # 32: right_middle_finger_third_joint
+    [51, 153, 255],   # 33: right_ring_finger4
+    [51, 153, 255],   # 34: right_ring_finger3
+    [51, 153, 255],   # 35: right_ring_finger2
+    [51, 153, 255],   # 36: right_ring_finger_third_joint
+    [51, 153, 255],   # 37: right_pinky_finger4
+    [51, 153, 255],   # 38: right_pinky_finger3
+    [51, 153, 255],   # 39: right_pinky_finger2
+    [51, 153, 255],   # 40: right_pinky_finger_third_joint
+    [51, 153, 255],   # 41: right_wrist
+    [51, 153, 255],   # 42: left_thumb4
+    [51, 153, 255],   # 43: left_thumb3
+    [51, 153, 255],   # 44: left_thumb2
+    [51, 153, 255],   # 45: left_thumb_third_joint
+    [51, 153, 255],   # 46: left_forefinger4
+    [51, 153, 255],   # 47: left_forefinger3
+    [51, 153, 255],   # 48: left_forefinger2
+    [51, 153, 255],   # 49: left_forefinger_third_joint
+    [51, 153, 255],   # 50: left_middle_finger4
+    [51, 153, 255],   # 51: left_middle_finger3
+    [51, 153, 255],   # 52: left_middle_finger2
+    [51, 153, 255],   # 53: left_middle_finger_third_joint
+    [51, 153, 255],   # 54: left_ring_finger4
+    [51, 153, 255],   # 55: left_ring_finger3
+    [51, 153, 255],   # 56: left_ring_finger2
+    [51, 153, 255],   # 57: left_ring_finger_third_joint
+    [51, 153, 255],   # 58: left_pinky_finger4
+    [51, 153, 255],   # 59: left_pinky_finger3
+    [51, 153, 255],   # 60: left_pinky_finger2
+    [51, 153, 255],   # 61: left_pinky_finger_third_joint
+    [51, 153, 255],   # 62: left_wrist
+    [51, 153, 255],   # 63: left_olecranon
+    [51, 153, 255],   # 64: right_olecranon
+    [51, 153, 255],   # 65: left_cubital_fossa
+    [51, 153, 255],   # 66: right_cubital_fossa
+    [51, 153, 255],   # 67: left_acromion
+    [51, 153, 255],   # 68: right_acromion
+    [51, 153, 255],   # 69: neck
+    [255, 255, 255],  # 70: center_of_glabella
+    [255, 255, 255],  # 71: center_of_nose_root
+    [255, 255, 255],  # 72: tip_of_nose_bridge
+    [255, 255, 255],  # 73: midpoint_1_of_nose_bridge
+    [255, 255, 255],  # 74: midpoint_2_of_nose_bridge
+    [255, 255, 255],  # 75: midpoint_3_of_nose_bridge
+    [255, 255, 255],  # 76: center_of_labiomental_groove
+    [255, 255, 255],  # 77: tip_of_chin
+    [255, 255, 255],  # 78: upper_startpoint_of_r_eyebrow
+    [255, 255, 255],  # 79: lower_startpoint_of_r_eyebrow
+    [255, 255, 255],  # 80: end_of_r_eyebrow
+    [255, 255, 255],  # 81: upper_midpoint_1_of_r_eyebrow
+    [255, 255, 255],  # 82: lower_midpoint_1_of_r_eyebrow
+    [255, 255, 255],  # 83: upper_midpoint_2_of_r_eyebrow
+    [255, 255, 255],  # 84: upper_midpoint_3_of_r_eyebrow
+    [255, 255, 255],  # 85: lower_midpoint_2_of_r_eyebrow
+    [255, 255, 255],  # 86: lower_midpoint_3_of_r_eyebrow
+    [255, 255, 255],  # 87: upper_startpoint_of_l_eyebrow
+    [255, 255, 255],  # 88: lower_startpoint_of_l_eyebrow
+    [255, 255, 255],  # 89: end_of_l_eyebrow
+    [255, 255, 255],  # 90: upper_midpoint_1_of_l_eyebrow
+    [255, 255, 255],  # 91: lower_midpoint_1_of_l_eyebrow
+    [255, 255, 255],  # 92: upper_midpoint_2_of_l_eyebrow
+    [255, 255, 255],  # 93: upper_midpoint_3_of_l_eyebrow
+    [255, 255, 255],  # 94: lower_midpoint_2_of_l_eyebrow
+    [255, 255, 255],  # 95: lower_midpoint_3_of_l_eyebrow
+    [192, 64, 128],   # 96: l_inner_end_of_upper_lash_line
+    [192, 64, 128],   # 97: l_outer_end_of_upper_lash_line
+    [192, 64, 128],   # 98: l_centerpoint_of_upper_lash_line
+    [192, 64, 128],   # 99: l_midpoint_2_of_upper_lash_line
+    [192, 64, 128],   # 100: l_midpoint_1_of_upper_lash_line
+    [192, 64, 128],   # 101: l_midpoint_6_of_upper_lash_line
+    [192, 64, 128],   # 102: l_midpoint_5_of_upper_lash_line
+    [192, 64, 128],   # 103: l_midpoint_4_of_upper_lash_line
+    [192, 64, 128],   # 104: l_midpoint_3_of_upper_lash_line
+    [192, 64, 128],   # 105: l_outer_end_of_upper_eyelid_line
+    [192, 64, 128],   # 106: l_midpoint_6_of_upper_eyelid_line
+    [192, 64, 128],   # 107: l_midpoint_2_of_upper_eyelid_line
+    [192, 64, 128],   # 108: l_midpoint_5_of_upper_eyelid_line
+    [192, 64, 128],   # 109: l_centerpoint_of_upper_eyelid_line
+    [192, 64, 128],   # 110: l_midpoint_4_of_upper_eyelid_line
+    [192, 64, 128],   # 111: l_midpoint_1_of_upper_eyelid_line
+    [192, 64, 128],   # 112: l_midpoint_3_of_upper_eyelid_line
+    [192, 64, 128],   # 113: l_midpoint_6_of_upper_crease_line
+    [192, 64, 128],   # 114: l_midpoint_2_of_upper_crease_line
+    [192, 64, 128],   # 115: l_midpoint_5_of_upper_crease_line
+    [192, 64, 128],   # 116: l_centerpoint_of_upper_crease_line
+    [192, 64, 128],   # 117: l_midpoint_4_of_upper_crease_line
+    [192, 64, 128],   # 118: l_midpoint_1_of_upper_crease_line
+    [192, 64, 128],   # 119: l_midpoint_3_of_upper_crease_line
+    [64, 32, 192],    # 120: r_inner_end_of_upper_lash_line
+    [64, 32, 192],    # 121: r_outer_end_of_upper_lash_line
+    [64, 32, 192],    # 122: r_centerpoint_of_upper_lash_line
+    [64, 32, 192],    # 123: r_midpoint_1_of_upper_lash_line
+    [64, 32, 192],    # 124: r_midpoint_2_of_upper_lash_line
+    [64, 32, 192],    # 125: r_midpoint_3_of_upper_lash_line
+    [64, 32, 192],    # 126: r_midpoint_4_of_upper_lash_line
+    [64, 32, 192],    # 127: r_midpoint_5_of_upper_lash_line
+    [64, 32, 192],    # 128: r_midpoint_6_of_upper_lash_line
+    [64, 32, 192],    # 129: r_outer_end_of_upper_eyelid_line
+    [64, 32, 192],    # 130: r_midpoint_3_of_upper_eyelid_line
+    [64, 32, 192],    # 131: r_midpoint_1_of_upper_eyelid_line
+    [64, 32, 192],    # 132: r_midpoint_4_of_upper_eyelid_line
+    [64, 32, 192],    # 133: r_centerpoint_of_upper_eyelid_line
+    [64, 32, 192],    # 134: r_midpoint_5_of_upper_eyelid_line
+    [64, 32, 192],    # 135: r_midpoint_2_of_upper_eyelid_line
+    [64, 32, 192],    # 136: r_midpoint_6_of_upper_eyelid_line
+    [64, 32, 192],    # 137: r_midpoint_3_of_upper_crease_line
+    [64, 32, 192],    # 138: r_midpoint_1_of_upper_crease_line
+    [64, 32, 192],    # 139: r_midpoint_4_of_upper_crease_line
+    [64, 32, 192],    # 140: r_centerpoint_of_upper_crease_line
+    [64, 32, 192],    # 141: r_midpoint_5_of_upper_crease_line
+    [64, 32, 192],    # 142: r_midpoint_2_of_upper_crease_line
+    [64, 32, 192],    # 143: r_midpoint_6_of_upper_crease_line
+    [64, 192, 128],   # 144: l_inner_end_of_lower_lash_line
+    [64, 192, 128],   # 145: l_outer_end_of_lower_lash_line
+    [64, 192, 128],   # 146: l_centerpoint_of_lower_lash_line
+    [64, 192, 128],   # 147: l_midpoint_2_of_lower_lash_line
+    [64, 192, 128],   # 148: l_midpoint_1_of_lower_lash_line
+    [64, 192, 128],   # 149: l_midpoint_6_of_lower_lash_line
+    [64, 192, 128],   # 150: l_midpoint_5_of_lower_lash_line
+    [64, 192, 128],   # 151: l_midpoint_4_of_lower_lash_line
+    [64, 192, 128],   # 152: l_midpoint_3_of_lower_lash_line
+    [64, 192, 128],   # 153: l_outer_end_of_lower_eyelid_line
+    [64, 192, 128],   # 154: l_midpoint_6_of_lower_eyelid_line
+    [64, 192, 128],   # 155: l_midpoint_2_of_lower_eyelid_line
+    [64, 192, 128],   # 156: l_midpoint_5_of_lower_eyelid_line
+    [64, 192, 128],   # 157: l_centerpoint_of_lower_eyelid_line
+    [64, 192, 128],   # 158: l_midpoint_4_of_lower_eyelid_line
+    [64, 192, 128],   # 159: l_midpoint_1_of_lower_eyelid_line
+    [64, 192, 128],   # 160: l_midpoint_3_of_lower_eyelid_line
+    [64, 192, 32],    # 161: r_inner_end_of_lower_lash_line
+    [64, 192, 32],    # 162: r_outer_end_of_lower_lash_line
+    [64, 192, 32],    # 163: r_centerpoint_of_lower_lash_line
+    [64, 192, 32],    # 164: r_midpoint_1_of_lower_lash_line
+    [64, 192, 32],    # 165: r_midpoint_2_of_lower_lash_line
+    [64, 192, 32],    # 166: r_midpoint_3_of_lower_lash_line
+    [64, 192, 32],    # 167: r_midpoint_4_of_lower_lash_line
+    [64, 192, 32],    # 168: r_midpoint_5_of_lower_lash_line
+    [64, 192, 32],    # 169: r_midpoint_6_of_lower_lash_line
+    [64, 192, 32],    # 170: r_outer_end_of_lower_eyelid_line
+    [64, 192, 32],    # 171: r_midpoint_3_of_lower_eyelid_line
+    [64, 192, 32],    # 172: r_midpoint_1_of_lower_eyelid_line
+    [64, 192, 32],    # 173: r_midpoint_4_of_lower_eyelid_line
+    [64, 192, 32],    # 174: r_centerpoint_of_lower_eyelid_line
+    [64, 192, 32],    # 175: r_midpoint_5_of_lower_eyelid_line
+    [64, 192, 32],    # 176: r_midpoint_2_of_lower_eyelid_line
+    [64, 192, 32],    # 177: r_midpoint_6_of_lower_eyelid_line
+    [0, 192, 0],      # 178: tip_of_nose
+    [0, 192, 0],      # 179: bottom_center_of_nose
+    [0, 192, 0],      # 180: r_outer_corner_of_nose
+    [0, 192, 0],      # 181: l_outer_corner_of_nose
+    [0, 192, 0],      # 182: inner_corner_of_r_nostril
+    [0, 192, 0],      # 183: outer_corner_of_r_nostril
+    [0, 192, 0],      # 184: upper_corner_of_r_nostril
+    [0, 192, 0],      # 185: inner_corner_of_l_nostril
+    [0, 192, 0],      # 186: outer_corner_of_l_nostril
+    [0, 192, 0],      # 187: upper_corner_of_l_nostril
+    [192, 0, 0],      # 188: r_outer_corner_of_mouth
+    [192, 0, 0],      # 189: l_outer_corner_of_mouth
+    [192, 0, 0],      # 190: center_of_cupid_bow
+    [192, 0, 0],      # 191: center_of_lower_outer_lip
+    [192, 0, 0],      # 192: midpoint_1_of_upper_outer_lip
+    [192, 0, 0],      # 193: midpoint_2_of_upper_outer_lip
+    [192, 0, 0],      # 194: midpoint_1_of_lower_outer_lip
+    [192, 0, 0],      # 195: midpoint_2_of_lower_outer_lip
+    [192, 0, 0],      # 196: midpoint_3_of_upper_outer_lip
+    [192, 0, 0],      # 197: midpoint_4_of_upper_outer_lip
+    [192, 0, 0],      # 198: midpoint_5_of_upper_outer_lip
+    [192, 0, 0],      # 199: midpoint_6_of_upper_outer_lip
+    [192, 0, 0],      # 200: midpoint_3_of_lower_outer_lip
+    [192, 0, 0],      # 201: midpoint_4_of_lower_outer_lip
+    [192, 0, 0],      # 202: midpoint_5_of_lower_outer_lip
+    [192, 0, 0],      # 203: midpoint_6_of_lower_outer_lip
+    [0, 192, 192],    # 204: r_inner_corner_of_mouth
+    [0, 192, 192],    # 205: l_inner_corner_of_mouth
+    [0, 192, 192],    # 206: center_of_upper_inner_lip
+    [0, 192, 192],    # 207: center_of_lower_inner_lip
+    [0, 192, 192],    # 208: midpoint_1_of_upper_inner_lip
+    [0, 192, 192],    # 209: midpoint_2_of_upper_inner_lip
+    [0, 192, 192],    # 210: midpoint_1_of_lower_inner_lip
+    [0, 192, 192],    # 211: midpoint_2_of_lower_inner_lip
+    [0, 192, 192],    # 212: midpoint_3_of_upper_inner_lip
+    [0, 192, 192],    # 213: midpoint_4_of_upper_inner_lip
+    [0, 192, 192],    # 214: midpoint_5_of_upper_inner_lip
+    [0, 192, 192],    # 215: midpoint_6_of_upper_inner_lip
+    [0, 192, 192],    # 216: midpoint_3_of_lower_inner_lip
+    [0, 192, 192],    # 217: midpoint_4_of_lower_inner_lip
+    [0, 192, 192],    # 218: midpoint_5_of_lower_inner_lip
+    [0, 192, 192],    # 219: midpoint_6_of_lower_inner_lip. teeths removed
+    [200, 200, 0],    # 256: l_top_end_of_inferior_crus
+    [200, 200, 0],    # 257: l_top_end_of_superior_crus
+    [200, 200, 0],    # 258: l_start_of_antihelix
+    [200, 200, 0],    # 259: l_end_of_antihelix
+    [200, 200, 0],    # 260: l_midpoint_1_of_antihelix
+    [200, 200, 0],    # 261: l_midpoint_1_of_inferior_crus
+    [200, 200, 0],    # 262: l_midpoint_2_of_antihelix
+    [200, 200, 0],    # 263: l_midpoint_3_of_antihelix
+    [200, 200, 0],    # 264: l_point_1_of_inner_helix
+    [200, 200, 0],    # 265: l_point_2_of_inner_helix
+    [200, 200, 0],    # 266: l_point_3_of_inner_helix
+    [200, 200, 0],    # 267: l_point_4_of_inner_helix
+    [200, 200, 0],    # 268: l_point_5_of_inner_helix
+    [200, 200, 0],    # 269: l_point_6_of_inner_helix
+    [200, 200, 0],    # 270: l_point_7_of_inner_helix
+    [200, 200, 0],    # 271: l_highest_point_of_antitragus
+    [200, 200, 0],    # 272: l_bottom_point_of_tragus
+    [200, 200, 0],    # 273: l_protruding_point_of_tragus
+    [200, 200, 0],    # 274: l_top_point_of_tragus
+    [200, 200, 0],    # 275: l_start_point_of_crus_of_helix
+    [200, 200, 0],    # 276: l_deepest_point_of_concha
+    [200, 200, 0],    # 277: l_tip_of_ear_lobe
+    [200, 200, 0],    # 278: l_midpoint_between_22_15
+    [200, 200, 0],    # 279: l_bottom_connecting_point_of_ear_lobe
+    [200, 200, 0],    # 280: l_top_connecting_point_of_helix
+    [200, 200, 0],    # 281: l_point_8_of_inner_helix
+    [0, 200, 200],    # 282: r_top_end_of_inferior_crus
+    [0, 200, 200],    # 283: r_top_end_of_superior_crus
+    [0, 200, 200],    # 284: r_start_of_antihelix
+    [0, 200, 200],    # 285: r_end_of_antihelix
+    [0, 200, 200],    # 286: r_midpoint_1_of_antihelix
+    [0, 200, 200],    # 287: r_midpoint_1_of_inferior_crus
+    [0, 200, 200],    # 288: r_midpoint_2_of_antihelix
+    [0, 200, 200],    # 289: r_midpoint_3_of_antihelix
+    [0, 200, 200],    # 290: r_point_1_of_inner_helix
+    [0, 200, 200],    # 291: r_point_8_of_inner_helix
+    [0, 200, 200],    # 292: r_point_3_of_inner_helix
+    [0, 200, 200],    # 293: r_point_4_of_inner_helix
+    [0, 200, 200],    # 294: r_point_5_of_inner_helix
+    [0, 200, 200],    # 295: r_point_6_of_inner_helix
+    [0, 200, 200],    # 296: r_point_7_of_inner_helix
+    [0, 200, 200],    # 297: r_highest_point_of_antitragus
+    [0, 200, 200],    # 298: r_bottom_point_of_tragus
+    [0, 200, 200],    # 299: r_protruding_point_of_tragus
+    [0, 200, 200],    # 300: r_top_point_of_tragus
+    [0, 200, 200],    # 301: r_start_point_of_crus_of_helix
+    [0, 200, 200],    # 302: r_deepest_point_of_concha
+    [0, 200, 200],    # 303: r_tip_of_ear_lobe
+    [0, 200, 200],    # 304: r_midpoint_between_22_15
+    [0, 200, 200],    # 305: r_bottom_connecting_point_of_ear_lobe
+    [0, 200, 200],    # 306: r_top_connecting_point_of_helix
+    [0, 200, 200],    # 307: r_point_2_of_inner_helix
+    [128, 192, 64],   # 308: l_center_of_iris
+    [128, 192, 64],   # 309: l_border_of_iris_3
+    [128, 192, 64],   # 310: l_border_of_iris_midpoint_1
+    [128, 192, 64],   # 311: l_border_of_iris_12
+    [128, 192, 64],   # 312: l_border_of_iris_midpoint_4
+    [128, 192, 64],   # 313: l_border_of_iris_9
+    [128, 192, 64],   # 314: l_border_of_iris_midpoint_3
+    [128, 192, 64],   # 315: l_border_of_iris_6
+    [128, 192, 64],   # 316: l_border_of_iris_midpoint_2
+    [192, 32, 64],    # 317: r_center_of_iris
+    [192, 32, 64],    # 318: r_border_of_iris_3
+    [192, 32, 64],    # 319: r_border_of_iris_midpoint_1
+    [192, 32, 64],    # 320: r_border_of_iris_12
+    [192, 32, 64],    # 321: r_border_of_iris_midpoint_4
+    [192, 32, 64],    # 322: r_border_of_iris_9
+    [192, 32, 64],    # 323: r_border_of_iris_midpoint_3
+    [192, 32, 64],    # 324: r_border_of_iris_6
+    [192, 32, 64],    # 325: r_border_of_iris_midpoint_2
+    [192, 128, 64],   # 326: l_center_of_pupil
+    [192, 128, 64],   # 327: l_border_of_pupil_3
+    [192, 128, 64],   # 328: l_border_of_pupil_midpoint_1
+    [192, 128, 64],   # 329: l_border_of_pupil_12
+    [192, 128, 64],   # 330: l_border_of_pupil_midpoint_4
+    [192, 128, 64],   # 331: l_border_of_pupil_9
+    [192, 128, 64],   # 332: l_border_of_pupil_midpoint_3
+    [192, 128, 64],   # 333: l_border_of_pupil_6
+    [192, 128, 64],   # 334: l_border_of_pupil_midpoint_2
+    [32, 192, 192],   # 335: r_center_of_pupil
+    [32, 192, 192],   # 336: r_border_of_pupil_3
+    [32, 192, 192],   # 337: r_border_of_pupil_midpoint_1
+    [32, 192, 192],   # 338: r_border_of_pupil_12
+    [32, 192, 192],   # 339: r_border_of_pupil_midpoint_4
+    [32, 192, 192],   # 340: r_border_of_pupil_9
+    [32, 192, 192],   # 341: r_border_of_pupil_midpoint_3
+    [32, 192, 192],   # 342: r_border_of_pupil_6
+    [32, 192, 192],   # 343: r_border_of_pupil_midpoint_2
+]
+GOLIATH_KEYPOINTS = [
+    "nose",
+    "left_eye",
+    "right_eye",
+    "left_ear",
+    "right_ear",
+    "left_shoulder",
+    "right_shoulder",
+    "left_elbow",
+    "right_elbow",
+    "left_hip",
+    "right_hip",
+    "left_knee",
+    "right_knee",
+    "left_ankle",
+    "right_ankle",
+    "left_big_toe",
+    "left_small_toe",
+    "left_heel",
+    "right_big_toe",
+    "right_small_toe",
+    "right_heel",
+    "right_thumb4",
+    "right_thumb3",
+    "right_thumb2",
+    "right_thumb_third_joint",
+    "right_forefinger4",
+    "right_forefinger3",
+    "right_forefinger2",
+    "right_forefinger_third_joint",
+    "right_middle_finger4",
+    "right_middle_finger3",
+    "right_middle_finger2",
+    "right_middle_finger_third_joint",
+    "right_ring_finger4",
+    "right_ring_finger3",
+    "right_ring_finger2",
+    "right_ring_finger_third_joint",
+    "right_pinky_finger4",
+    "right_pinky_finger3",
+    "right_pinky_finger2",
+    "right_pinky_finger_third_joint",
+    "right_wrist",
+    "left_thumb4",
+    "left_thumb3",
+    "left_thumb2",
+    "left_thumb_third_joint",
+    "left_forefinger4",
+    "left_forefinger3",
+    "left_forefinger2",
+    "left_forefinger_third_joint",
+    "left_middle_finger4",
+    "left_middle_finger3",
+    "left_middle_finger2",
+    "left_middle_finger_third_joint",
+    "left_ring_finger4",
+    "left_ring_finger3",
+    "left_ring_finger2",
+    "left_ring_finger_third_joint",
+    "left_pinky_finger4",
+    "left_pinky_finger3",
+    "left_pinky_finger2",
+    "left_pinky_finger_third_joint",
+    "left_wrist",
+    "left_olecranon",
+    "right_olecranon",
+    "left_cubital_fossa",
+    "right_cubital_fossa",
+    "left_acromion",
+    "right_acromion",
+    "neck",
+    "center_of_glabella",
+    "center_of_nose_root",
+    "tip_of_nose_bridge",
+    "midpoint_1_of_nose_bridge",
+    "midpoint_2_of_nose_bridge",
+    "midpoint_3_of_nose_bridge",
+    "center_of_labiomental_groove",
+    "tip_of_chin",
+    "upper_startpoint_of_r_eyebrow",
+    "lower_startpoint_of_r_eyebrow",
+    "end_of_r_eyebrow",
+    "upper_midpoint_1_of_r_eyebrow",
+    "lower_midpoint_1_of_r_eyebrow",
+    "upper_midpoint_2_of_r_eyebrow",
+    "upper_midpoint_3_of_r_eyebrow",
+    "lower_midpoint_2_of_r_eyebrow",
+    "lower_midpoint_3_of_r_eyebrow",
+    "upper_startpoint_of_l_eyebrow",
+    "lower_startpoint_of_l_eyebrow",
+    "end_of_l_eyebrow",
+    "upper_midpoint_1_of_l_eyebrow",
+    "lower_midpoint_1_of_l_eyebrow",
+    "upper_midpoint_2_of_l_eyebrow",
+    "upper_midpoint_3_of_l_eyebrow",
+    "lower_midpoint_2_of_l_eyebrow",
+    "lower_midpoint_3_of_l_eyebrow",
+    "l_inner_end_of_upper_lash_line",
+    "l_outer_end_of_upper_lash_line",
+    "l_centerpoint_of_upper_lash_line",
+    "l_midpoint_2_of_upper_lash_line",
+    "l_midpoint_1_of_upper_lash_line",
+    "l_midpoint_6_of_upper_lash_line",
+    "l_midpoint_5_of_upper_lash_line",
+    "l_midpoint_4_of_upper_lash_line",
+    "l_midpoint_3_of_upper_lash_line",
+    "l_outer_end_of_upper_eyelid_line",
+    "l_midpoint_6_of_upper_eyelid_line",
+    "l_midpoint_2_of_upper_eyelid_line",
+    "l_midpoint_5_of_upper_eyelid_line",
+    "l_centerpoint_of_upper_eyelid_line",
+    "l_midpoint_4_of_upper_eyelid_line",
+    "l_midpoint_1_of_upper_eyelid_line",
+    "l_midpoint_3_of_upper_eyelid_line",
+    "l_midpoint_6_of_upper_crease_line",
+    "l_midpoint_2_of_upper_crease_line",
+    "l_midpoint_5_of_upper_crease_line",
+    "l_centerpoint_of_upper_crease_line",
+    "l_midpoint_4_of_upper_crease_line",
+    "l_midpoint_1_of_upper_crease_line",
+    "l_midpoint_3_of_upper_crease_line",
+    "r_inner_end_of_upper_lash_line",
+    "r_outer_end_of_upper_lash_line",
+    "r_centerpoint_of_upper_lash_line",
+    "r_midpoint_1_of_upper_lash_line",
+    "r_midpoint_2_of_upper_lash_line",
+    "r_midpoint_3_of_upper_lash_line",
+    "r_midpoint_4_of_upper_lash_line",
+    "r_midpoint_5_of_upper_lash_line",
+    "r_midpoint_6_of_upper_lash_line",
+    "r_outer_end_of_upper_eyelid_line",
+    "r_midpoint_3_of_upper_eyelid_line",
+    "r_midpoint_1_of_upper_eyelid_line",
+    "r_midpoint_4_of_upper_eyelid_line",
+    "r_centerpoint_of_upper_eyelid_line",
+    "r_midpoint_5_of_upper_eyelid_line",
+    "r_midpoint_2_of_upper_eyelid_line",
+    "r_midpoint_6_of_upper_eyelid_line",
+    "r_midpoint_3_of_upper_crease_line",
+    "r_midpoint_1_of_upper_crease_line",
+    "r_midpoint_4_of_upper_crease_line",
+    "r_centerpoint_of_upper_crease_line",
+    "r_midpoint_5_of_upper_crease_line",
+    "r_midpoint_2_of_upper_crease_line",
+    "r_midpoint_6_of_upper_crease_line",
+    "l_inner_end_of_lower_lash_line",
+    "l_outer_end_of_lower_lash_line",
+    "l_centerpoint_of_lower_lash_line",
+    "l_midpoint_2_of_lower_lash_line",
+    "l_midpoint_1_of_lower_lash_line",
+    "l_midpoint_6_of_lower_lash_line",
+    "l_midpoint_5_of_lower_lash_line",
+    "l_midpoint_4_of_lower_lash_line",
+    "l_midpoint_3_of_lower_lash_line",
+    "l_outer_end_of_lower_eyelid_line",
+    "l_midpoint_6_of_lower_eyelid_line",
+    "l_midpoint_2_of_lower_eyelid_line",
+    "l_midpoint_5_of_lower_eyelid_line",
+    "l_centerpoint_of_lower_eyelid_line",
+    "l_midpoint_4_of_lower_eyelid_line",
+    "l_midpoint_1_of_lower_eyelid_line",
+    "l_midpoint_3_of_lower_eyelid_line",
+    "r_inner_end_of_lower_lash_line",
+    "r_outer_end_of_lower_lash_line",
+    "r_centerpoint_of_lower_lash_line",
+    "r_midpoint_1_of_lower_lash_line",
+    "r_midpoint_2_of_lower_lash_line",
+    "r_midpoint_3_of_lower_lash_line",
+    "r_midpoint_4_of_lower_lash_line",
+    "r_midpoint_5_of_lower_lash_line",
+    "r_midpoint_6_of_lower_lash_line",
+    "r_outer_end_of_lower_eyelid_line",
+    "r_midpoint_3_of_lower_eyelid_line",
+    "r_midpoint_1_of_lower_eyelid_line",
+    "r_midpoint_4_of_lower_eyelid_line",
+    "r_centerpoint_of_lower_eyelid_line",
+    "r_midpoint_5_of_lower_eyelid_line",
+    "r_midpoint_2_of_lower_eyelid_line",
+    "r_midpoint_6_of_lower_eyelid_line",
+    "tip_of_nose",
+    "bottom_center_of_nose",
+    "r_outer_corner_of_nose",
+    "l_outer_corner_of_nose",
+    "inner_corner_of_r_nostril",
+    "outer_corner_of_r_nostril",
+    "upper_corner_of_r_nostril",
+    "inner_corner_of_l_nostril",
+    "outer_corner_of_l_nostril",
+    "upper_corner_of_l_nostril",
+    "r_outer_corner_of_mouth",
+    "l_outer_corner_of_mouth",
+    "center_of_cupid_bow",
+    "center_of_lower_outer_lip",
+    "midpoint_1_of_upper_outer_lip",
+    "midpoint_2_of_upper_outer_lip",
+    "midpoint_1_of_lower_outer_lip",
+    "midpoint_2_of_lower_outer_lip",
+    "midpoint_3_of_upper_outer_lip",
+    "midpoint_4_of_upper_outer_lip",
+    "midpoint_5_of_upper_outer_lip",
+    "midpoint_6_of_upper_outer_lip",
+    "midpoint_3_of_lower_outer_lip",
+    "midpoint_4_of_lower_outer_lip",
+    "midpoint_5_of_lower_outer_lip",
+    "midpoint_6_of_lower_outer_lip",
+    "r_inner_corner_of_mouth",
+    "l_inner_corner_of_mouth",
+    "center_of_upper_inner_lip",
+    "center_of_lower_inner_lip",
+    "midpoint_1_of_upper_inner_lip",
+    "midpoint_2_of_upper_inner_lip",
+    "midpoint_1_of_lower_inner_lip",
+    "midpoint_2_of_lower_inner_lip",
+    "midpoint_3_of_upper_inner_lip",
+    "midpoint_4_of_upper_inner_lip",
+    "midpoint_5_of_upper_inner_lip",
+    "midpoint_6_of_upper_inner_lip",
+    "midpoint_3_of_lower_inner_lip",
+    "midpoint_4_of_lower_inner_lip",
+    "midpoint_5_of_lower_inner_lip",
+    "midpoint_6_of_lower_inner_lip",
+    "l_top_end_of_inferior_crus",
+    "l_top_end_of_superior_crus",
+    "l_start_of_antihelix",
+    "l_end_of_antihelix",
+    "l_midpoint_1_of_antihelix",
+    "l_midpoint_1_of_inferior_crus",
+    "l_midpoint_2_of_antihelix",
+    "l_midpoint_3_of_antihelix",
+    "l_point_1_of_inner_helix",
+    "l_point_2_of_inner_helix",
+    "l_point_3_of_inner_helix",
+    "l_point_4_of_inner_helix",
+    "l_point_5_of_inner_helix",
+    "l_point_6_of_inner_helix",
+    "l_point_7_of_inner_helix",
+    "l_highest_point_of_antitragus",
+    "l_bottom_point_of_tragus",
+    "l_protruding_point_of_tragus",
+    "l_top_point_of_tragus",
+    "l_start_point_of_crus_of_helix",
+    "l_deepest_point_of_concha",
+    "l_tip_of_ear_lobe",
+    "l_midpoint_between_22_15",
+    "l_bottom_connecting_point_of_ear_lobe",
+    "l_top_connecting_point_of_helix",
+    "l_point_8_of_inner_helix",
+    "r_top_end_of_inferior_crus",
+    "r_top_end_of_superior_crus",
+    "r_start_of_antihelix",
+    "r_end_of_antihelix",
+    "r_midpoint_1_of_antihelix",
+    "r_midpoint_1_of_inferior_crus",
+    "r_midpoint_2_of_antihelix",
+    "r_midpoint_3_of_antihelix",
+    "r_point_1_of_inner_helix",
+    "r_point_8_of_inner_helix",
+    "r_point_3_of_inner_helix",
+    "r_point_4_of_inner_helix",
+    "r_point_5_of_inner_helix",
+    "r_point_6_of_inner_helix",
+    "r_point_7_of_inner_helix",
+    "r_highest_point_of_antitragus",
+    "r_bottom_point_of_tragus",
+    "r_protruding_point_of_tragus",
+    "r_top_point_of_tragus",
+    "r_start_point_of_crus_of_helix",
+    "r_deepest_point_of_concha",
+    "r_tip_of_ear_lobe",
+    "r_midpoint_between_22_15",
+    "r_bottom_connecting_point_of_ear_lobe",
+    "r_top_connecting_point_of_helix",
+    "r_point_2_of_inner_helix",
+    "l_center_of_iris",
+    "l_border_of_iris_3",
+    "l_border_of_iris_midpoint_1",
+    "l_border_of_iris_12",
+    "l_border_of_iris_midpoint_4",
+    "l_border_of_iris_9",
+    "l_border_of_iris_midpoint_3",
+    "l_border_of_iris_6",
+    "l_border_of_iris_midpoint_2",
+    "r_center_of_iris",
+    "r_border_of_iris_3",
+    "r_border_of_iris_midpoint_1",
+    "r_border_of_iris_12",
+    "r_border_of_iris_midpoint_4",
+    "r_border_of_iris_9",
+    "r_border_of_iris_midpoint_3",
+    "r_border_of_iris_6",
+    "r_border_of_iris_midpoint_2",
+    "l_center_of_pupil",
+    "l_border_of_pupil_3",
+    "l_border_of_pupil_midpoint_1",
+    "l_border_of_pupil_12",
+    "l_border_of_pupil_midpoint_4",
+    "l_border_of_pupil_9",
+    "l_border_of_pupil_midpoint_3",
+    "l_border_of_pupil_6",
+    "l_border_of_pupil_midpoint_2",
+    "r_center_of_pupil",
+    "r_border_of_pupil_3",
+    "r_border_of_pupil_midpoint_1",
+    "r_border_of_pupil_12",
+    "r_border_of_pupil_midpoint_4",
+    "r_border_of_pupil_9",
+    "r_border_of_pupil_midpoint_3",
+    "r_border_of_pupil_6",
+    "r_border_of_pupil_midpoint_2"
+]
+GOLIATH_SKELETON_INFO = {
+        0:
+        dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+        1:
+        dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+        2:
+        dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+        3:
+        dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+        4:
+        dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+        5:
+        dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+        6:
+        dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+        7:
+        dict(
+            link=('left_shoulder', 'right_shoulder'),
+            id=7,
+            color=[51, 153, 255]),
+        8:
+        dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+        9:
+        dict(
+            link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+        10:
+        dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+        11:
+        dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+        12:
+        dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+        13:
+        dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+        14:
+        dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+        15:
+        dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+        16:
+        dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+        17:
+        dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+        18:
+        dict(
+            link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]),
+        19:
+        dict(link=('left_ankle', 'left_big_toe'), id=19, color=[0, 255, 0]),
+        20:
+        dict(link=('left_ankle', 'left_small_toe'), id=20, color=[0, 255, 0]),
+        21:
+        dict(link=('left_ankle', 'left_heel'), id=21, color=[0, 255, 0]),
+        22:
+        dict(
+            link=('right_ankle', 'right_big_toe'), id=22, color=[255, 128, 0]),
+        23:
+        dict(
+            link=('right_ankle', 'right_small_toe'),
+            id=23,
+            color=[255, 128, 0]),
+        24:
+        dict(link=('right_ankle', 'right_heel'), id=24, color=[255, 128, 0]),
+        25:
+        dict(
+            link=('left_wrist', 'left_thumb_third_joint'), id=25, color=[255, 128,
+                                                                  0]),
+        26:
+        dict(link=('left_thumb_third_joint', 'left_thumb2'), id=26, color=[255, 128, 0]),
+        27:
+        dict(link=('left_thumb2', 'left_thumb3'), id=27, color=[255, 128, 0]),
+        28:
+        dict(link=('left_thumb3', 'left_thumb4'), id=28, color=[255, 128, 0]),
+        29:
+        dict(
+            link=('left_wrist', 'left_forefinger_third_joint'),
+            id=29,
+            color=[255, 153, 255]),
+        30:
+        dict(
+            link=('left_forefinger_third_joint', 'left_forefinger2'),
+            id=30,
+            color=[255, 153, 255]),
+        31:
+        dict(
+            link=('left_forefinger2', 'left_forefinger3'),
+            id=31,
+            color=[255, 153, 255]),
+        32:
+        dict(
+            link=('left_forefinger3', 'left_forefinger4'),
+            id=32,
+            color=[255, 153, 255]),
+        33:
+        dict(
+            link=('left_wrist', 'left_middle_finger_third_joint'),
+            id=33,
+            color=[102, 178, 255]),
+        34:
+        dict(
+            link=('left_middle_finger_third_joint', 'left_middle_finger2'),
+            id=34,
+            color=[102, 178, 255]),
+        35:
+        dict(
+            link=('left_middle_finger2', 'left_middle_finger3'),
+            id=35,
+            color=[102, 178, 255]),
+        36:
+        dict(
+            link=('left_middle_finger3', 'left_middle_finger4'),
+            id=36,
+            color=[102, 178, 255]),
+        37:
+        dict(
+            link=('left_wrist', 'left_ring_finger_third_joint'),
+            id=37,
+            color=[255, 51, 51]),
+        38:
+        dict(
+            link=('left_ring_finger_third_joint', 'left_ring_finger2'),
+            id=38,
+            color=[255, 51, 51]),
+        39:
+        dict(
+            link=('left_ring_finger2', 'left_ring_finger3'),
+            id=39,
+            color=[255, 51, 51]),
+        40:
+        dict(
+            link=('left_ring_finger3', 'left_ring_finger4'),
+            id=40,
+            color=[255, 51, 51]),
+        41:
+        dict(
+            link=('left_wrist', 'left_pinky_finger_third_joint'),
+            id=41,
+            color=[0, 255, 0]),
+        42:
+        dict(
+            link=('left_pinky_finger_third_joint', 'left_pinky_finger2'),
+            id=42,
+            color=[0, 255, 0]),
+        43:
+        dict(
+            link=('left_pinky_finger2', 'left_pinky_finger3'),
+            id=43,
+            color=[0, 255, 0]),
+        44:
+        dict(
+            link=('left_pinky_finger3', 'left_pinky_finger4'),
+            id=44,
+            color=[0, 255, 0]),
+        45:
+        dict(
+            link=('right_wrist', 'right_thumb_third_joint'),
+            id=45,
+            color=[255, 128, 0]),
+        46:
+        dict(
+            link=('right_thumb_third_joint', 'right_thumb2'), id=46, color=[255, 128, 0]),
+        47:
+        dict(
+            link=('right_thumb2', 'right_thumb3'), id=47, color=[255, 128, 0]),
+        48:
+        dict(
+            link=('right_thumb3', 'right_thumb4'), id=48, color=[255, 128, 0]),
+        49:
+        dict(
+            link=('right_wrist', 'right_forefinger_third_joint'),
+            id=49,
+            color=[255, 153, 255]),
+        50:
+        dict(
+            link=('right_forefinger_third_joint', 'right_forefinger2'),
+            id=50,
+            color=[255, 153, 255]),
+        51:
+        dict(
+            link=('right_forefinger2', 'right_forefinger3'),
+            id=51,
+            color=[255, 153, 255]),
+        52:
+        dict(
+            link=('right_forefinger3', 'right_forefinger4'),
+            id=52,
+            color=[255, 153, 255]),
+        53:
+        dict(
+            link=('right_wrist', 'right_middle_finger_third_joint'),
+            id=53,
+            color=[102, 178, 255]),
+        54:
+        dict(
+            link=('right_middle_finger_third_joint', 'right_middle_finger2'),
+            id=54,
+            color=[102, 178, 255]),
+        55:
+        dict(
+            link=('right_middle_finger2', 'right_middle_finger3'),
+            id=55,
+            color=[102, 178, 255]),
+        56:
+        dict(
+            link=('right_middle_finger3', 'right_middle_finger4'),
+            id=56,
+            color=[102, 178, 255]),
+        57:
+        dict(
+            link=('right_wrist', 'right_ring_finger_third_joint'),
+            id=57,
+            color=[255, 51, 51]),
+        58:
+        dict(
+            link=('right_ring_finger_third_joint', 'right_ring_finger2'),
+            id=58,
+            color=[255, 51, 51]),
+        59:
+        dict(
+            link=('right_ring_finger2', 'right_ring_finger3'),
+            id=59,
+            color=[255, 51, 51]),
+        60:
+        dict(
+            link=('right_ring_finger3', 'right_ring_finger4'),
+            id=60,
+            color=[255, 51, 51]),
+        61:
+        dict(
+            link=('right_wrist', 'right_pinky_finger_third_joint'),
+            id=61,
+            color=[0, 255, 0]),
+        62:
+        dict(
+            link=('right_pinky_finger_third_joint', 'right_pinky_finger2'),
+            id=62,
+            color=[0, 255, 0]),
+        63:
+        dict(
+            link=('right_pinky_finger2', 'right_pinky_finger3'),
+            id=63,
+            color=[0, 255, 0]),
+        64:
+        dict(
+            link=('right_pinky_finger3', 'right_pinky_finger4'),
+            id=64,
+            color=[0, 255, 0])
+    }

detector_utils.py ADDED Viewed

	@@ -0,0 +1,196 @@

+from typing import List, Optional, Sequence, Union
+import torch
+import cv2
+import numpy as np
+from mmcv.ops import RoIPool
+from mmengine.dataset import Compose, pseudo_collate
+from mmengine.device import get_device
+from mmengine.registry import init_default_scope
+from mmdet.apis import inference_detector, init_detector
+from mmdet.structures import DetDataSample, SampleList
+from mmdet.utils import get_test_pipeline_cfg
+ImagesType = Union[str, np.ndarray, Sequence[str], Sequence[np.ndarray]]
+def nms(dets: np.ndarray, thr: float):
+    """Greedily select boxes with high confidence and overlap <= thr.
+    Args:
+        dets (np.ndarray): [[x1, y1, x2, y2, score]].
+        thr (float): Retain overlap < thr.
+    Returns:
+        list: Indexes to keep.
+    """
+    if len(dets) == 0:
+        return []
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+    keep = []
+    while len(order) > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+        inds = np.where(ovr <= thr)[0]
+        order = order[inds + 1]
+    return keep
+def adapt_mmdet_pipeline(cfg):
+    """Converts pipeline types in MMDetection's test dataloader to use the
+    'mmdet' namespace.
+    Args:
+        cfg (ConfigDict): Configuration dictionary for MMDetection.
+    Returns:
+        ConfigDict: Configuration dictionary with updated pipeline types.
+    """
+    # use lazy import to avoid hard dependence on mmdet
+    from mmdet.datasets import transforms
+    if 'test_dataloader' not in cfg:
+        return cfg
+    pipeline = cfg.test_dataloader.dataset.pipeline
+    for trans in pipeline:
+        if trans['type'] in dir(transforms):
+            trans['type'] = 'mmdet.' + trans['type']
+    return cfg
+def inference_detector(
+    model: torch.nn.Module,
+    imgs: ImagesType,
+    test_pipeline: Optional[Compose] = None,
+    text_prompt: Optional[str] = None,
+    custom_entities: bool = False,
+) -> Union[DetDataSample, SampleList]:
+    """Inference image(s) with the detector.
+    Args:
+        model (nn.Module): The loaded detector.
+        imgs (str, ndarray, Sequence[str/ndarray]):
+           Either image files or loaded images.
+        test_pipeline (:obj:`Compose`): Test pipeline.
+    Returns:
+        :obj:`DetDataSample` or list[:obj:`DetDataSample`]:
+        If imgs is a list or tuple, the same length list type results
+        will be returned, otherwise return the detection results directly.
+    """
+    if isinstance(imgs, torch.Tensor):
+        if imgs.is_cuda:
+            imgs = imgs.cpu()
+        # Remove batch dimension and transpose
+        imgs = imgs.squeeze(0).permute(1, 2, 0).numpy()
+        # Ensure the data type is appropriate (uint8 for most image processing functions)
+        imgs = (imgs * 255).astype(np.uint8)
+    if isinstance(imgs, (list, tuple)) or (isinstance(imgs, np.ndarray) and len(imgs.shape) == 4):
+        is_batch = True
+    else:
+        imgs = [imgs]
+        is_batch = False
+    cfg = model.cfg
+    if test_pipeline is None:
+        cfg = cfg.copy()
+        test_pipeline = get_test_pipeline_cfg(cfg)
+        if isinstance(imgs[0], np.ndarray):
+            # Calling this method across libraries will result
+            # in module unregistered error if not prefixed with mmdet.
+            test_pipeline[0].type = "mmdet.LoadImageFromNDArray"
+        test_pipeline = Compose(test_pipeline)
+    if model.data_preprocessor.device.type == "cpu":
+        for m in model.modules():
+            assert not isinstance(
+                m, RoIPool
+            ), "CPU inference with RoIPool is not supported currently."
+    result_list = []
+    for i, img in enumerate(imgs):
+        # prepare data
+        if isinstance(img, np.ndarray):
+            # TODO: remove img_id.
+            data_ = dict(img=img, img_id=0)
+        else:
+            # TODO: remove img_id.
+            data_ = dict(img_path=img, img_id=0)
+        if text_prompt:
+            data_["text"] = text_prompt
+            data_["custom_entities"] = custom_entities
+        # build the data pipeline
+        data_ = test_pipeline(data_)
+        data_["inputs"] = [data_["inputs"]]
+        data_["data_samples"] = [data_["data_samples"]]
+        # forward the model
+        with torch.no_grad(), torch.autocast(device_type=get_device(), dtype=torch.bfloat16):
+            results = model.test_step(data_)[0]
+        result_list.append(results)
+    if not is_batch:
+        return result_list[0]
+    else:
+        return result_list
+def process_one_image_bbox(pred_instance, det_cat_id, bbox_thr, nms_thr):
+    bboxes = np.concatenate(
+        (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1
+    )
+    bboxes = bboxes[
+        np.logical_and(
+            pred_instance.labels == det_cat_id,
+            pred_instance.scores > bbox_thr,
+        )
+    ]
+    bboxes = bboxes[nms(bboxes, nms_thr), :4]
+    return bboxes
+def process_images_detector(imgs, detector):
+    """Visualize predicted keypoints (and heatmaps) of one image."""
+    # predict bbox
+    det_results = inference_detector(detector, imgs)
+    pred_instances = list(
+        map(lambda det_result: det_result.pred_instances.numpy(), det_results)
+    )
+    bboxes_batch = list(
+        map(
+            lambda pred_instance: process_one_image_bbox(
+                pred_instance, 0, 0.3, 0.3 ## argparse.Namespace(det_cat_id=0, bbox_thr=0.3, nms_thr=0.3),
+            ),
+            pred_instances,
+        )
+    )
+    return bboxes_batch

external/cv/.gitignore ADDED Viewed

	@@ -0,0 +1,125 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# PyTorch checkpoint
+*.pth
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+#dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+mlu-ops/
+mlu-ops.*
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/en/_build/
+docs/en/api/generated/
+docs/zh_cn/_build/
+docs/zh_cn/api/generated/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+# editors and IDEs
+.idea/
+.vscode/
+# custom
+.DS_Store
+# datasets and logs and checkpoints
+data/
+work_dir/
+src/

external/cv/MANIFEST.in ADDED Viewed

	@@ -0,0 +1,6 @@

+include requirements/runtime.txt
+include mmcv/ops/csrc/common/cuda/*.cuh mmcv/ops/csrc/common/cuda/*.hpp mmcv/ops/csrc/common/*.hpp
+include mmcv/ops/csrc/pytorch/*.cpp mmcv/ops/csrc/pytorch/cuda/*.cu mmcv/ops/csrc/pytorch/cuda/*.cpp mmcv/ops/csrc/pytorch/cpu/*.cpp
+include mmcv/ops/csrc/parrots/*.h mmcv/ops/csrc/parrots/*.cpp
+include mmcv/ops/csrc/pytorch/mps/*.mm mmcv/ops/csrc/common/mps/*.h mmcv/ops/csrc/common/mps/*.mm
+recursive-include mmcv/ops/csrc/ *.h *.hpp *.cpp *.cuh *.cu *.mm

external/cv/dist/sapiens_cv-1.0.0-cp310-cp310-linux_x86_64.whl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:746f2be13eefdfe43a59d9c415e03a4b0b922e6ce487b76a572a376ae76c9300
+size 30006791

external/cv/mmcv/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# flake8: noqa
+from .arraymisc import *
+from .image import *
+from .transforms import *
+from .version import *
+from .video import *
+from .visualization import *
+# The following modules are not imported to this level, so mmcv may be used
+# without PyTorch.
+# - op
+# - utils

external/cv/mmcv/arraymisc/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from .quantization import dequantize, quantize
+__all__ = ['quantize', 'dequantize']

external/cv/mmcv/arraymisc/quantization.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Union
+import numpy as np
+def quantize(arr: np.ndarray,
+             min_val: Union[int, float],
+             max_val: Union[int, float],
+             levels: int,
+             dtype=np.int64) -> tuple:
+    """Quantize an array of (-inf, inf) to [0, levels-1].
+    Args:
+        arr (ndarray): Input array.
+        min_val (int or float): Minimum value to be clipped.
+        max_val (int or float): Maximum value to be clipped.
+        levels (int): Quantization levels.
+        dtype (np.type): The type of the quantized array.
+    Returns:
+        tuple: Quantized array.
+    """
+    if not (isinstance(levels, int) and levels > 1):
+        raise ValueError(
+            f'levels must be a positive integer, but got {levels}')
+    if min_val >= max_val:
+        raise ValueError(
+            f'min_val ({min_val}) must be smaller than max_val ({max_val})')
+    arr = np.clip(arr, min_val, max_val) - min_val
+    quantized_arr = np.minimum(
+        np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1)
+    return quantized_arr
+def dequantize(arr: np.ndarray,
+               min_val: Union[int, float],
+               max_val: Union[int, float],
+               levels: int,
+               dtype=np.float64) -> tuple:
+    """Dequantize an array.
+    Args:
+        arr (ndarray): Input array.
+        min_val (int or float): Minimum value to be clipped.
+        max_val (int or float): Maximum value to be clipped.
+        levels (int): Quantization levels.
+        dtype (np.type): The type of the dequantized array.
+    Returns:
+        tuple: Dequantized array.
+    """
+    if not (isinstance(levels, int) and levels > 1):
+        raise ValueError(
+            f'levels must be a positive integer, but got {levels}')
+    if min_val >= max_val:
+        raise ValueError(
+            f'min_val ({min_val}) must be smaller than max_val ({max_val})')
+    dequantized_arr = (arr + 0.5).astype(dtype) * (max_val -
+                                                   min_val) / levels + min_val
+    return dequantized_arr

external/cv/mmcv/cnn/__init__.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from .alexnet import AlexNet
+# yapf: disable
+from .bricks import (ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule,
+                     ConvTranspose2d, ConvTranspose3d, ConvWS2d,
+                     DepthwiseSeparableConvModule, GeneralizedAttention,
+                     HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d,
+                     NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish,
+                     build_activation_layer, build_conv_layer,
+                     build_norm_layer, build_padding_layer, build_plugin_layer,
+                     build_upsample_layer, conv_ws_2d, is_norm)
+# yapf: enable
+from .resnet import ResNet, make_res_layer
+from .rfsearch import Conv2dRFSearchOp, RFSearchHook
+from .utils import fuse_conv_bn, get_model_complexity_info
+from .vgg import VGG, make_vgg_layer
+__all__ = [
+    'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer',
+    'ConvModule', 'build_activation_layer', 'build_conv_layer',
+    'build_norm_layer', 'build_padding_layer', 'build_upsample_layer',
+    'build_plugin_layer', 'is_norm', 'NonLocal1d', 'NonLocal2d', 'NonLocal3d',
+    'ContextBlock', 'HSigmoid', 'Swish', 'HSwish', 'GeneralizedAttention',
+    'Scale', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d',
+    'DepthwiseSeparableConvModule', 'Linear', 'Conv2d', 'ConvTranspose2d',
+    'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'fuse_conv_bn',
+    'get_model_complexity_info', 'Conv2dRFSearchOp', 'RFSearchHook'
+]

external/cv/mmcv/cnn/alexnet.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import logging
+from typing import Optional
+import torch
+import torch.nn as nn
+from mmengine.runner import load_checkpoint
+class AlexNet(nn.Module):
+    """AlexNet backbone.
+    Args:
+        num_classes (int): number of classes for classification.
+    """
+    def __init__(self, num_classes: int = -1):
+        super().__init__()
+        self.num_classes = num_classes
+        self.features = nn.Sequential(
+            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+            nn.Conv2d(64, 192, kernel_size=5, padding=2),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+            nn.Conv2d(192, 384, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(384, 256, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 256, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+        )
+        if self.num_classes > 0:
+            self.classifier = nn.Sequential(
+                nn.Dropout(),
+                nn.Linear(256 * 6 * 6, 4096),
+                nn.ReLU(inplace=True),
+                nn.Dropout(),
+                nn.Linear(4096, 4096),
+                nn.ReLU(inplace=True),
+                nn.Linear(4096, num_classes),
+            )
+    def init_weights(self, pretrained: Optional[str] = None) -> None:
+        if isinstance(pretrained, str):
+            logger = logging.getLogger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            # use default initializer
+            pass
+        else:
+            raise TypeError('pretrained must be a str or None')
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.features(x)
+        if self.num_classes > 0:
+            x = x.view(x.size(0), 256 * 6 * 6)
+            x = self.classifier(x)
+        return x

external/cv/mmcv/cnn/bricks/__init__.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from .activation import build_activation_layer
+from .context_block import ContextBlock
+from .conv import build_conv_layer
+from .conv2d_adaptive_padding import Conv2dAdaptivePadding
+from .conv_module import ConvModule
+from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d
+from .depthwise_separable_conv_module import DepthwiseSeparableConvModule
+from .drop import Dropout, DropPath
+from .generalized_attention import GeneralizedAttention
+from .hsigmoid import HSigmoid
+from .hswish import HSwish
+from .non_local import NonLocal1d, NonLocal2d, NonLocal3d
+from .norm import build_norm_layer, is_norm
+from .padding import build_padding_layer
+from .plugin import build_plugin_layer
+from .scale import LayerScale, Scale
+from .swish import Swish
+from .upsample import build_upsample_layer
+from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d,
+                       Linear, MaxPool2d, MaxPool3d)
+__all__ = [
+    'ConvModule', 'build_activation_layer', 'build_conv_layer',
+    'build_norm_layer', 'build_padding_layer', 'build_upsample_layer',
+    'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d',
+    'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention',
+    'Scale', 'ConvAWS2d', 'ConvWS2d', 'conv_ws_2d',
+    'DepthwiseSeparableConvModule', 'Swish', 'Linear', 'Conv2dAdaptivePadding',
+    'Conv2d', 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d',
+    'Conv3d', 'Dropout', 'DropPath', 'LayerScale'
+]

external/cv/mmcv/cnn/bricks/activation.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Dict
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmengine.registry import MODELS
+from mmengine.utils import digit_version
+from mmengine.utils.dl_utils import TORCH_VERSION
+for module in [
+        nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU,
+        nn.Sigmoid, nn.Tanh
+]:
+    MODELS.register_module(module=module)
+if digit_version(torch.__version__) >= digit_version('1.7.0'):
+    MODELS.register_module(module=nn.SiLU, name='SiLU')
+else:
+    class SiLU(nn.Module):
+        """Sigmoid Weighted Liner Unit."""
+        def __init__(self, inplace=False):
+            super().__init__()
+            self.inplace = inplace
+        def forward(self, inputs) -> torch.Tensor:
+            if self.inplace:
+                return inputs.mul_(torch.sigmoid(inputs))
+            else:
+                return inputs * torch.sigmoid(inputs)
+    MODELS.register_module(module=SiLU, name='SiLU')
+@MODELS.register_module(name='Clip')
+@MODELS.register_module()
+class Clamp(nn.Module):
+    """Clamp activation layer.
+    This activation function is to clamp the feature map value within
+    :math:`[min, max]`. More details can be found in ``torch.clamp()``.
+    Args:
+        min (Number | optional): Lower-bound of the range to be clamped to.
+            Default to -1.
+        max (Number | optional): Upper-bound of the range to be clamped to.
+            Default to 1.
+    """
+    def __init__(self, min: float = -1., max: float = 1.):
+        super().__init__()
+        self.min = min
+        self.max = max
+    def forward(self, x) -> torch.Tensor:
+        """Forward function.
+        Args:
+            x (torch.Tensor): The input tensor.
+        Returns:
+            torch.Tensor: Clamped tensor.
+        """
+        return torch.clamp(x, min=self.min, max=self.max)
+class GELU(nn.Module):
+    r"""Applies the Gaussian Error Linear Units function:
+    .. math::
+        \text{GELU}(x) = x * \Phi(x)
+    where :math:`\Phi(x)` is the Cumulative Distribution Function for
+    Gaussian Distribution.
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+    .. image:: scripts/activation_images/GELU.png
+    Examples::
+        >>> m = nn.GELU()
+        >>> input = torch.randn(2)
+        >>> output = m(input)
+    """
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        return F.gelu(input)
+if (TORCH_VERSION == 'parrots'
+        or digit_version(TORCH_VERSION) < digit_version('1.4')):
+    MODELS.register_module(module=GELU)
+else:
+    MODELS.register_module(module=nn.GELU)
+def build_activation_layer(cfg: Dict) -> nn.Module:
+    """Build activation layer.
+    Args:
+        cfg (dict): The activation layer config, which should contain:
+            - type (str): Layer type.
+            - layer args: Args needed to instantiate an activation layer.
+    Returns:
+        nn.Module: Created activation layer.
+    """
+    return MODELS.build(cfg)

external/cv/mmcv/cnn/bricks/context_block.py ADDED Viewed

	@@ -0,0 +1,131 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Union
+import torch
+from mmengine.model import constant_init, kaiming_init
+from mmengine.registry import MODELS
+from torch import nn
+def last_zero_init(m: Union[nn.Module, nn.Sequential]) -> None:
+    if isinstance(m, nn.Sequential):
+        constant_init(m[-1], val=0)
+    else:
+        constant_init(m, val=0)
+@MODELS.register_module()
+class ContextBlock(nn.Module):
+    """ContextBlock module in GCNet.
+    See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond'
+    (https://arxiv.org/abs/1904.11492) for details.
+    Args:
+        in_channels (int): Channels of the input feature map.
+        ratio (float): Ratio of channels of transform bottleneck
+        pooling_type (str): Pooling method for context modeling.
+            Options are 'att' and 'avg', stand for attention pooling and
+            average pooling respectively. Default: 'att'.
+        fusion_types (Sequence[str]): Fusion method for feature fusion,
+            Options are 'channels_add', 'channel_mul', stand for channelwise
+            addition and multiplication respectively. Default: ('channel_add',)
+    """
+    _abbr_ = 'context_block'
+    def __init__(self,
+                 in_channels: int,
+                 ratio: float,
+                 pooling_type: str = 'att',
+                 fusion_types: tuple = ('channel_add', )):
+        super().__init__()
+        assert pooling_type in ['avg', 'att']
+        assert isinstance(fusion_types, (list, tuple))
+        valid_fusion_types = ['channel_add', 'channel_mul']
+        assert all([f in valid_fusion_types for f in fusion_types])
+        assert len(fusion_types) > 0, 'at least one fusion should be used'
+        self.in_channels = in_channels
+        self.ratio = ratio
+        self.planes = int(in_channels * ratio)
+        self.pooling_type = pooling_type
+        self.fusion_types = fusion_types
+        if pooling_type == 'att':
+            self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1)
+            self.softmax = nn.Softmax(dim=2)
+        else:
+            self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        if 'channel_add' in fusion_types:
+            self.channel_add_conv = nn.Sequential(
+                nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
+                nn.LayerNorm([self.planes, 1, 1]),
+                nn.ReLU(inplace=True),  # yapf: disable
+                nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
+        else:
+            self.channel_add_conv = None
+        if 'channel_mul' in fusion_types:
+            self.channel_mul_conv = nn.Sequential(
+                nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
+                nn.LayerNorm([self.planes, 1, 1]),
+                nn.ReLU(inplace=True),  # yapf: disable
+                nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
+        else:
+            self.channel_mul_conv = None
+        self.reset_parameters()
+    def reset_parameters(self):
+        if self.pooling_type == 'att':
+            kaiming_init(self.conv_mask, mode='fan_in')
+            self.conv_mask.inited = True
+        if self.channel_add_conv is not None:
+            last_zero_init(self.channel_add_conv)
+        if self.channel_mul_conv is not None:
+            last_zero_init(self.channel_mul_conv)
+    def spatial_pool(self, x: torch.Tensor) -> torch.Tensor:
+        batch, channel, height, width = x.size()
+        if self.pooling_type == 'att':
+            input_x = x
+            # [N, C, H * W]
+            input_x = input_x.view(batch, channel, height * width)
+            # [N, 1, C, H * W]
+            input_x = input_x.unsqueeze(1)
+            # [N, 1, H, W]
+            context_mask = self.conv_mask(x)
+            # [N, 1, H * W]
+            context_mask = context_mask.view(batch, 1, height * width)
+            # [N, 1, H * W]
+            context_mask = self.softmax(context_mask)
+            # [N, 1, H * W, 1]
+            context_mask = context_mask.unsqueeze(-1)
+            # [N, 1, C, 1]
+            context = torch.matmul(input_x, context_mask)
+            # [N, C, 1, 1]
+            context = context.view(batch, channel, 1, 1)
+        else:
+            # [N, C, 1, 1]
+            context = self.avg_pool(x)
+        return context
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # [N, C, 1, 1]
+        context = self.spatial_pool(x)
+        out = x
+        if self.channel_mul_conv is not None:
+            # [N, C, 1, 1]
+            channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
+            out = out * channel_mul_term
+        if self.channel_add_conv is not None:
+            # [N, C, 1, 1]
+            channel_add_term = self.channel_add_conv(context)
+            out = out + channel_add_term
+        return out

external/cv/mmcv/cnn/bricks/conv.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import inspect
+from typing import Dict, Optional
+from mmengine.registry import MODELS
+from torch import nn
+MODELS.register_module('Conv1d', module=nn.Conv1d)
+MODELS.register_module('Conv2d', module=nn.Conv2d)
+MODELS.register_module('Conv3d', module=nn.Conv3d)
+MODELS.register_module('Conv', module=nn.Conv2d)
+def build_conv_layer(cfg: Optional[Dict], *args, **kwargs) -> nn.Module:
+    """Build convolution layer.
+    Args:
+        cfg (None or dict): The conv layer config, which should contain:
+            - type (str): Layer type.
+            - layer args: Args needed to instantiate an conv layer.
+        args (argument list): Arguments passed to the `__init__`
+            method of the corresponding conv layer.
+        kwargs (keyword arguments): Keyword arguments passed to the `__init__`
+            method of the corresponding conv layer.
+    Returns:
+        nn.Module: Created conv layer.
+    """
+    if cfg is None:
+        cfg_ = dict(type='Conv2d')
+    else:
+        if not isinstance(cfg, dict):
+            raise TypeError('cfg must be a dict')
+        if 'type' not in cfg:
+            raise KeyError('the cfg dict must contain the key "type"')
+        cfg_ = cfg.copy()
+    layer_type = cfg_.pop('type')
+    if inspect.isclass(layer_type):
+        return layer_type(*args, **kwargs, **cfg_)  # type: ignore
+    # Switch registry to the target scope. If `conv_layer` cannot be found
+    # in the registry, fallback to search `conv_layer` in the
+    # mmengine.MODELS.
+    with MODELS.switch_scope_and_registry(None) as registry:
+        conv_layer = registry.get(layer_type)
+    if conv_layer is None:
+        raise KeyError(f'Cannot find {conv_layer} in registry under scope '
+                       f'name {registry.scope}')
+    layer = conv_layer(*args, **kwargs, **cfg_)
+    return layer

external/cv/mmcv/cnn/bricks/conv2d_adaptive_padding.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import math
+from typing import Tuple, Union
+import torch
+from mmengine.registry import MODELS
+from torch import nn
+from torch.nn import functional as F
+@MODELS.register_module()
+class Conv2dAdaptivePadding(nn.Conv2d):
+    """Implementation of 2D convolution in tensorflow with `padding` as "same",
+    which applies padding to input (if needed) so that input image gets fully
+    covered by filter and stride you specified. For stride 1, this will ensure
+    that output image size is same as input. For stride of 2, output dimensions
+    will be half, for example.
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int or tuple): Size of the convolving kernel
+        stride (int or tuple, optional): Stride of the convolution. Default: 1
+        padding (int or tuple, optional): Zero-padding added to both sides of
+            the input. Default: 0
+        dilation (int or tuple, optional): Spacing between kernel elements.
+            Default: 1
+        groups (int, optional): Number of blocked connections from input
+            channels to output channels. Default: 1
+        bias (bool, optional): If ``True``, adds a learnable bias to the
+            output. Default: ``True``
+    """
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: Union[int, Tuple[int, int]],
+                 stride: Union[int, Tuple[int, int]] = 1,
+                 padding: Union[int, Tuple[int, int]] = 0,
+                 dilation: Union[int, Tuple[int, int]] = 1,
+                 groups: int = 1,
+                 bias: bool = True):
+        super().__init__(in_channels, out_channels, kernel_size, stride, 0,
+                         dilation, groups, bias)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        img_h, img_w = x.size()[-2:]
+        kernel_h, kernel_w = self.weight.size()[-2:]
+        stride_h, stride_w = self.stride
+        output_h = math.ceil(img_h / stride_h)
+        output_w = math.ceil(img_w / stride_w)
+        pad_h = (
+            max((output_h - 1) * self.stride[0] +
+                (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0))
+        pad_w = (
+            max((output_w - 1) * self.stride[1] +
+                (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0))
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(x, [
+                pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2
+            ])
+        return F.conv2d(x, self.weight, self.bias, self.stride, self.padding,
+                        self.dilation, self.groups)

external/cv/mmcv/cnn/bricks/conv_module.py ADDED Viewed

	@@ -0,0 +1,343 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import warnings
+from functools import partial
+from typing import Dict, Optional, Tuple, Union
+import torch
+import torch.nn as nn
+from mmengine.model import constant_init, kaiming_init
+from mmengine.registry import MODELS
+from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm, _InstanceNorm
+from .activation import build_activation_layer
+from .conv import build_conv_layer
+from .norm import build_norm_layer
+from .padding import build_padding_layer
+def efficient_conv_bn_eval_forward(bn: _BatchNorm,
+                                   conv: nn.modules.conv._ConvNd,
+                                   x: torch.Tensor):
+    """
+    Implementation based on https://arxiv.org/abs/2305.11624
+    "Tune-Mode ConvBN Blocks For Efficient Transfer Learning"
+    It leverages the associative law between convolution and affine transform,
+    i.e., normalize (weight conv feature) = (normalize weight) conv feature.
+    It works for Eval mode of ConvBN blocks during validation, and can be used
+    for training as well. It reduces memory and computation cost.
+    Args:
+        bn (_BatchNorm): a BatchNorm module.
+        conv (nn._ConvNd): a conv module
+        x (torch.Tensor): Input feature map.
+    """
+    # These lines of code are designed to deal with various cases
+    # like bn without affine transform, and conv without bias
+    weight_on_the_fly = conv.weight
+    if conv.bias is not None:
+        bias_on_the_fly = conv.bias
+    else:
+        bias_on_the_fly = torch.zeros_like(bn.running_var)
+    if bn.weight is not None:
+        bn_weight = bn.weight
+    else:
+        bn_weight = torch.ones_like(bn.running_var)
+    if bn.bias is not None:
+        bn_bias = bn.bias
+    else:
+        bn_bias = torch.zeros_like(bn.running_var)
+    # shape of [C_out, 1, 1, 1] in Conv2d
+    weight_coeff = torch.rsqrt(bn.running_var +
+                               bn.eps).reshape([-1] + [1] *
+                                               (len(conv.weight.shape) - 1))
+    # shape of [C_out, 1, 1, 1] in Conv2d
+    coefff_on_the_fly = bn_weight.view_as(weight_coeff) * weight_coeff
+    # shape of [C_out, C_in, k, k] in Conv2d
+    weight_on_the_fly = weight_on_the_fly * coefff_on_the_fly
+    # shape of [C_out] in Conv2d
+    bias_on_the_fly = bn_bias + coefff_on_the_fly.flatten() *\
+        (bias_on_the_fly - bn.running_mean)
+    return conv._conv_forward(x, weight_on_the_fly, bias_on_the_fly)
+@MODELS.register_module()
+class ConvModule(nn.Module):
+    """A conv block that bundles conv/norm/activation layers.
+    This block simplifies the usage of convolution layers, which are commonly
+    used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
+    It is based upon three build methods: `build_conv_layer()`,
+    `build_norm_layer()` and `build_activation_layer()`.
+    Besides, we add some additional features in this module.
+    1. Automatically set `bias` of the conv layer.
+    2. Spectral norm is supported.
+    3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only
+    supports zero and circular padding, and we add "reflect" padding mode.
+    Args:
+        in_channels (int): Number of channels in the input feature map.
+            Same as that in ``nn._ConvNd``.
+        out_channels (int): Number of channels produced by the convolution.
+            Same as that in ``nn._ConvNd``.
+        kernel_size (int | tuple[int]): Size of the convolving kernel.
+            Same as that in ``nn._ConvNd``.
+        stride (int | tuple[int]): Stride of the convolution.
+            Same as that in ``nn._ConvNd``.
+        padding (int | tuple[int]): Zero-padding added to both sides of
+            the input. Same as that in ``nn._ConvNd``.
+        dilation (int | tuple[int]): Spacing between kernel elements.
+            Same as that in ``nn._ConvNd``.
+        groups (int): Number of blocked connections from input channels to
+            output channels. Same as that in ``nn._ConvNd``.
+        bias (bool | str): If specified as `auto`, it will be decided by the
+            norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise
+            False. Default: "auto".
+        conv_cfg (dict): Config dict for convolution layer. Default: None,
+            which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer. Default: None.
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU').
+        inplace (bool): Whether to use inplace mode for activation.
+            Default: True.
+        with_spectral_norm (bool): Whether use spectral norm in conv module.
+            Default: False.
+        padding_mode (str): If the `padding_mode` has not been supported by
+            current `Conv2d` in PyTorch, we will use our own padding layer
+            instead. Currently, we support ['zeros', 'circular'] with official
+            implementation and ['reflect'] with our own implementation.
+            Default: 'zeros'.
+        order (tuple[str]): The order of conv/norm/activation layers. It is a
+            sequence of "conv", "norm" and "act". Common examples are
+            ("conv", "norm", "act") and ("act", "conv", "norm").
+            Default: ('conv', 'norm', 'act').
+        efficient_conv_bn_eval (bool): Whether use efficient conv when the
+            consecutive bn is in eval mode (either training or testing), as
+            proposed in https://arxiv.org/abs/2305.11624 . Default: `False`.
+    """
+    _abbr_ = 'conv_block'
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: Union[int, Tuple[int, int]],
+                 stride: Union[int, Tuple[int, int]] = 1,
+                 padding: Union[int, Tuple[int, int]] = 0,
+                 dilation: Union[int, Tuple[int, int]] = 1,
+                 groups: int = 1,
+                 bias: Union[bool, str] = 'auto',
+                 conv_cfg: Optional[Dict] = None,
+                 norm_cfg: Optional[Dict] = None,
+                 act_cfg: Optional[Dict] = dict(type='ReLU'),
+                 inplace: bool = True,
+                 with_spectral_norm: bool = False,
+                 padding_mode: str = 'zeros',
+                 order: tuple = ('conv', 'norm', 'act'),
+                 efficient_conv_bn_eval: bool = False):
+        super().__init__()
+        assert conv_cfg is None or isinstance(conv_cfg, dict)
+        assert norm_cfg is None or isinstance(norm_cfg, dict)
+        assert act_cfg is None or isinstance(act_cfg, dict)
+        official_padding_mode = ['zeros', 'circular']
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.inplace = inplace
+        self.with_spectral_norm = with_spectral_norm
+        self.with_explicit_padding = padding_mode not in official_padding_mode
+        self.order = order
+        assert isinstance(self.order, tuple) and len(self.order) == 3
+        assert set(order) == {'conv', 'norm', 'act'}
+        self.with_norm = norm_cfg is not None
+        self.with_activation = act_cfg is not None
+        # if the conv layer is before a norm layer, bias is unnecessary.
+        if bias == 'auto':
+            bias = not self.with_norm
+        self.with_bias = bias
+        if self.with_explicit_padding:
+            pad_cfg = dict(type=padding_mode)
+            self.padding_layer = build_padding_layer(pad_cfg, padding)
+        # reset padding to 0 for conv module
+        conv_padding = 0 if self.with_explicit_padding else padding
+        # build convolution layer
+        self.conv = build_conv_layer(
+            conv_cfg,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=conv_padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias)
+        # export the attributes of self.conv to a higher level for convenience
+        self.in_channels = self.conv.in_channels
+        self.out_channels = self.conv.out_channels
+        self.kernel_size = self.conv.kernel_size
+        self.stride = self.conv.stride
+        self.padding = padding
+        self.dilation = self.conv.dilation
+        self.transposed = self.conv.transposed
+        self.output_padding = self.conv.output_padding
+        self.groups = self.conv.groups
+        if self.with_spectral_norm:
+            self.conv = nn.utils.spectral_norm(self.conv)
+        # build normalization layers
+        if self.with_norm:
+            # norm layer is after conv layer
+            if order.index('norm') > order.index('conv'):
+                norm_channels = out_channels
+            else:
+                norm_channels = in_channels
+            self.norm_name, norm = build_norm_layer(
+                norm_cfg, norm_channels)  # type: ignore
+            self.add_module(self.norm_name, norm)
+            if self.with_bias:
+                if isinstance(norm, (_BatchNorm, _InstanceNorm)):
+                    warnings.warn(
+                        'Unnecessary conv bias before batch/instance norm')
+        else:
+            self.norm_name = None  # type: ignore
+        self.turn_on_efficient_conv_bn_eval(efficient_conv_bn_eval)
+        # build activation layer
+        if self.with_activation:
+            act_cfg_ = act_cfg.copy()  # type: ignore
+            # nn.Tanh has no 'inplace' argument
+            if act_cfg_['type'] not in [
+                    'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish', 'GELU'
+            ]:
+                act_cfg_.setdefault('inplace', inplace)
+            self.activate = build_activation_layer(act_cfg_)
+        # Use msra init by default
+        self.init_weights()
+    @property
+    def norm(self):
+        if self.norm_name:
+            return getattr(self, self.norm_name)
+        else:
+            return None
+    def init_weights(self):
+        # 1. It is mainly for customized conv layers with their own
+        #    initialization manners by calling their own ``init_weights()``,
+        #    and we do not want ConvModule to override the initialization.
+        # 2. For customized conv layers without their own initialization
+        #    manners (that is, they don't have their own ``init_weights()``)
+        #    and PyTorch's conv layers, they will be initialized by
+        #    this method with default ``kaiming_init``.
+        # Note: For PyTorch's conv layers, they will be overwritten by our
+        #    initialization implementation using default ``kaiming_init``.
+        if not hasattr(self.conv, 'init_weights'):
+            if self.with_activation and self.act_cfg['type'] == 'LeakyReLU':
+                nonlinearity = 'leaky_relu'
+                a = self.act_cfg.get('negative_slope', 0.01)
+            else:
+                nonlinearity = 'relu'
+                a = 0
+            kaiming_init(self.conv, a=a, nonlinearity=nonlinearity)
+        if self.with_norm:
+            constant_init(self.norm, 1, bias=0)
+    def forward(self,
+                x: torch.Tensor,
+                activate: bool = True,
+                norm: bool = True) -> torch.Tensor:
+        layer_index = 0
+        while layer_index < len(self.order):
+            layer = self.order[layer_index]
+            if layer == 'conv':
+                if self.with_explicit_padding:
+                    x = self.padding_layer(x)
+                # if the next operation is norm and we have a norm layer in
+                # eval mode and we have enabled `efficient_conv_bn_eval` for
+                # the conv operator, then activate the optimized forward and
+                # skip the next norm operator since it has been fused
+                if layer_index + 1 < len(self.order) and \
+                        self.order[layer_index + 1] == 'norm' and norm and \
+                        self.with_norm and not self.norm.training and \
+                        self.efficient_conv_bn_eval_forward is not None:
+                    self.conv.forward = partial(
+                        self.efficient_conv_bn_eval_forward, self.norm,
+                        self.conv)
+                    layer_index += 1
+                    x = self.conv(x)
+                    del self.conv.forward
+                else:
+                    x = self.conv(x)
+            elif layer == 'norm' and norm and self.with_norm:
+                x = self.norm(x)
+            elif layer == 'act' and activate and self.with_activation:
+                x = self.activate(x)
+            layer_index += 1
+        return x
+    def turn_on_efficient_conv_bn_eval(self, efficient_conv_bn_eval=True):
+        # efficient_conv_bn_eval works for conv + bn
+        # with `track_running_stats` option
+        if efficient_conv_bn_eval and self.norm \
+                            and isinstance(self.norm, _BatchNorm) \
+                            and self.norm.track_running_stats:
+            self.efficient_conv_bn_eval_forward = efficient_conv_bn_eval_forward  # noqa: E501
+        else:
+            self.efficient_conv_bn_eval_forward = None  # type: ignore
+    @staticmethod
+    def create_from_conv_bn(conv: torch.nn.modules.conv._ConvNd,
+                            bn: torch.nn.modules.batchnorm._BatchNorm,
+                            efficient_conv_bn_eval=True) -> 'ConvModule':
+        """Create a ConvModule from a conv and a bn module."""
+        self = ConvModule.__new__(ConvModule)
+        super(ConvModule, self).__init__()
+        self.conv_cfg = None
+        self.norm_cfg = None
+        self.act_cfg = None
+        self.inplace = False
+        self.with_spectral_norm = False
+        self.with_explicit_padding = False
+        self.order = ('conv', 'norm', 'act')
+        self.with_norm = True
+        self.with_activation = False
+        self.with_bias = conv.bias is not None
+        # build convolution layer
+        self.conv = conv
+        # export the attributes of self.conv to a higher level for convenience
+        self.in_channels = self.conv.in_channels
+        self.out_channels = self.conv.out_channels
+        self.kernel_size = self.conv.kernel_size
+        self.stride = self.conv.stride
+        self.padding = self.conv.padding
+        self.dilation = self.conv.dilation
+        self.transposed = self.conv.transposed
+        self.output_padding = self.conv.output_padding
+        self.groups = self.conv.groups
+        # build normalization layers
+        self.norm_name, norm = 'bn', bn
+        self.add_module(self.norm_name, norm)
+        self.turn_on_efficient_conv_bn_eval(efficient_conv_bn_eval)
+        return self

external/cv/mmcv/cnn/bricks/conv_ws.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from collections import OrderedDict
+from typing import Dict, List, Optional, Tuple, Union
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmengine.registry import MODELS
+def conv_ws_2d(input: torch.Tensor,
+               weight: torch.Tensor,
+               bias: Optional[torch.Tensor] = None,
+               stride: Union[int, Tuple[int, int]] = 1,
+               padding: Union[int, Tuple[int, int]] = 0,
+               dilation: Union[int, Tuple[int, int]] = 1,
+               groups: int = 1,
+               eps: float = 1e-5) -> torch.Tensor:
+    c_in = weight.size(0)
+    weight_flat = weight.view(c_in, -1)
+    mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
+    std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
+    weight = (weight - mean) / (std + eps)
+    return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
+@MODELS.register_module('ConvWS')
+class ConvWS2d(nn.Conv2d):
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: Union[int, Tuple[int, int]],
+                 stride: Union[int, Tuple[int, int]] = 1,
+                 padding: Union[int, Tuple[int, int]] = 0,
+                 dilation: Union[int, Tuple[int, int]] = 1,
+                 groups: int = 1,
+                 bias: bool = True,
+                 eps: float = 1e-5):
+        super().__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias)
+        self.eps = eps
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
+                          self.dilation, self.groups, self.eps)
+@MODELS.register_module(name='ConvAWS')
+class ConvAWS2d(nn.Conv2d):
+    """AWS (Adaptive Weight Standardization)
+    This is a variant of Weight Standardization
+    (https://arxiv.org/pdf/1903.10520.pdf)
+    It is used in DetectoRS to avoid NaN
+    (https://arxiv.org/pdf/2006.02334.pdf)
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int or tuple): Size of the conv kernel
+        stride (int or tuple, optional): Stride of the convolution. Default: 1
+        padding (int or tuple, optional): Zero-padding added to both sides of
+            the input. Default: 0
+        dilation (int or tuple, optional): Spacing between kernel elements.
+            Default: 1
+        groups (int, optional): Number of blocked connections from input
+            channels to output channels. Default: 1
+        bias (bool, optional): If set True, adds a learnable bias to the
+            output. Default: True
+    """
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: Union[int, Tuple[int, int]],
+                 stride: Union[int, Tuple[int, int]] = 1,
+                 padding: Union[int, Tuple[int, int]] = 0,
+                 dilation: Union[int, Tuple[int, int]] = 1,
+                 groups: int = 1,
+                 bias: bool = True):
+        super().__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias)
+        self.register_buffer('weight_gamma',
+                             torch.ones(self.out_channels, 1, 1, 1))
+        self.register_buffer('weight_beta',
+                             torch.zeros(self.out_channels, 1, 1, 1))
+    def _get_weight(self, weight: torch.Tensor) -> torch.Tensor:
+        weight_flat = weight.view(weight.size(0), -1)
+        mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1)
+        std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1)
+        weight = (weight - mean) / std
+        weight = self.weight_gamma * weight + self.weight_beta
+        return weight
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        weight = self._get_weight(self.weight)
+        return F.conv2d(x, weight, self.bias, self.stride, self.padding,
+                        self.dilation, self.groups)
+    def _load_from_state_dict(self, state_dict: OrderedDict, prefix: str,
+                              local_metadata: Dict, strict: bool,
+                              missing_keys: List[str],
+                              unexpected_keys: List[str],
+                              error_msgs: List[str]) -> None:
+        """Override default load function.
+        AWS overrides the function _load_from_state_dict to recover
+        weight_gamma and weight_beta if they are missing. If weight_gamma and
+        weight_beta are found in the checkpoint, this function will return
+        after super()._load_from_state_dict. Otherwise, it will compute the
+        mean and std of the pretrained weights and store them in weight_beta
+        and weight_gamma.
+        """
+        self.weight_gamma.data.fill_(-1)
+        local_missing_keys: List = []
+        super()._load_from_state_dict(state_dict, prefix, local_metadata,
+                                      strict, local_missing_keys,
+                                      unexpected_keys, error_msgs)
+        if self.weight_gamma.data.mean() > 0:
+            for k in local_missing_keys:
+                missing_keys.append(k)
+            return
+        weight = self.weight.data
+        weight_flat = weight.view(weight.size(0), -1)
+        mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1)
+        std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1)
+        self.weight_beta.data.copy_(mean)
+        self.weight_gamma.data.copy_(std)
+        missing_gamma_beta = [
+            k for k in local_missing_keys
+            if k.endswith('weight_gamma') or k.endswith('weight_beta')
+        ]
+        for k in missing_gamma_beta:
+            local_missing_keys.remove(k)
+        for k in local_missing_keys:
+            missing_keys.append(k)

external/cv/mmcv/cnn/bricks/depthwise_separable_conv_module.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Dict, Optional, Tuple, Union
+import torch
+import torch.nn as nn
+from .conv_module import ConvModule
+class DepthwiseSeparableConvModule(nn.Module):
+    """Depthwise separable convolution module.
+    See https://arxiv.org/pdf/1704.04861.pdf for details.
+    This module can replace a ConvModule with the conv block replaced by two
+    conv block: depthwise conv block and pointwise conv block. The depthwise
+    conv block contains depthwise-conv/norm/activation layers. The pointwise
+    conv block contains pointwise-conv/norm/activation layers. It should be
+    noted that there will be norm/activation layer in the depthwise conv block
+    if `norm_cfg` and `act_cfg` are specified.
+    Args:
+        in_channels (int): Number of channels in the input feature map.
+            Same as that in ``nn._ConvNd``.
+        out_channels (int): Number of channels produced by the convolution.
+            Same as that in ``nn._ConvNd``.
+        kernel_size (int | tuple[int]): Size of the convolving kernel.
+            Same as that in ``nn._ConvNd``.
+        stride (int | tuple[int]): Stride of the convolution.
+            Same as that in ``nn._ConvNd``. Default: 1.
+        padding (int | tuple[int]): Zero-padding added to both sides of
+            the input. Same as that in ``nn._ConvNd``. Default: 0.
+        dilation (int | tuple[int]): Spacing between kernel elements.
+            Same as that in ``nn._ConvNd``. Default: 1.
+        norm_cfg (dict): Default norm config for both depthwise ConvModule and
+            pointwise ConvModule. Default: None.
+        act_cfg (dict): Default activation config for both depthwise ConvModule
+            and pointwise ConvModule. Default: dict(type='ReLU').
+        dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is
+            'default', it will be the same as `norm_cfg`. Default: 'default'.
+        dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is
+            'default', it will be the same as `act_cfg`. Default: 'default'.
+        pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is
+            'default', it will be the same as `norm_cfg`. Default: 'default'.
+        pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is
+            'default', it will be the same as `act_cfg`. Default: 'default'.
+        kwargs (optional): Other shared arguments for depthwise and pointwise
+            ConvModule. See ConvModule for ref.
+    """
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: Union[int, Tuple[int, int]],
+                 stride: Union[int, Tuple[int, int]] = 1,
+                 padding: Union[int, Tuple[int, int]] = 0,
+                 dilation: Union[int, Tuple[int, int]] = 1,
+                 norm_cfg: Optional[Dict] = None,
+                 act_cfg: Dict = dict(type='ReLU'),
+                 dw_norm_cfg: Union[Dict, str] = 'default',
+                 dw_act_cfg: Union[Dict, str] = 'default',
+                 pw_norm_cfg: Union[Dict, str] = 'default',
+                 pw_act_cfg: Union[Dict, str] = 'default',
+                 **kwargs):
+        super().__init__()
+        assert 'groups' not in kwargs, 'groups should not be specified'
+        # if norm/activation config of depthwise/pointwise ConvModule is not
+        # specified, use default config.
+        dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg  # type: ignore # noqa E501
+        dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg
+        pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg  # type: ignore # noqa E501
+        pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg
+        # depthwise convolution
+        self.depthwise_conv = ConvModule(
+            in_channels,
+            in_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=in_channels,
+            norm_cfg=dw_norm_cfg,  # type: ignore
+            act_cfg=dw_act_cfg,  # type: ignore
+            **kwargs)
+        self.pointwise_conv = ConvModule(
+            in_channels,
+            out_channels,
+            1,
+            norm_cfg=pw_norm_cfg,  # type: ignore
+            act_cfg=pw_act_cfg,  # type: ignore
+            **kwargs)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.depthwise_conv(x)
+        x = self.pointwise_conv(x)
+        return x

external/cv/mmcv/cnn/bricks/drop.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Any, Dict, Optional
+import torch
+import torch.nn as nn
+from mmengine.registry import MODELS
+def drop_path(x: torch.Tensor,
+              drop_prob: float = 0.,
+              training: bool = False) -> torch.Tensor:
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of
+    residual blocks).
+    We follow the implementation
+    https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py  # noqa: E501
+    """
+    if not training:
+        return x
+    keep_prob = 1 - drop_prob
+    # handle tensors with different dimensions, not just 4D tensors.
+    shape = (x.shape[0], ) + (1, ) * (x.ndim - 1)
+    random_tensor = keep_prob + torch.rand(
+        shape, dtype=x.dtype, device=x.device)
+    output = x.div(keep_prob) * random_tensor.floor()
+    return output
+@MODELS.register_module()
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of
+    residual blocks).
+    We follow the implementation
+    https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py  # noqa: E501
+    Args:
+        drop_prob (float): Probability of the path to be zeroed. Default: 0.1
+    """
+    def __init__(self, drop_prob: float = 0.1):
+        super().__init__()
+        self.drop_prob = drop_prob
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return drop_path(x, self.drop_prob, self.training)
+@MODELS.register_module()
+class Dropout(nn.Dropout):
+    """A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of
+    ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with
+    ``DropPath``
+    Args:
+        drop_prob (float): Probability of the elements to be
+            zeroed. Default: 0.5.
+        inplace (bool):  Do the operation inplace or not. Default: False.
+    """
+    def __init__(self, drop_prob: float = 0.5, inplace: bool = False):
+        super().__init__(p=drop_prob, inplace=inplace)
+def build_dropout(cfg: Dict, default_args: Optional[Dict] = None) -> Any:
+    """Builder for drop out layers."""
+    return MODELS.build(cfg, default_args=default_args)

external/cv/mmcv/cnn/bricks/generalized_attention.py ADDED Viewed

	@@ -0,0 +1,416 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmengine.model import kaiming_init
+from mmengine.registry import MODELS
+@MODELS.register_module()
+class GeneralizedAttention(nn.Module):
+    """GeneralizedAttention module.
+    See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks'
+    (https://arxiv.org/abs/1904.05873) for details.
+    Args:
+        in_channels (int): Channels of the input feature map.
+        spatial_range (int): The spatial range. -1 indicates no spatial range
+            constraint. Default: -1.
+        num_heads (int): The head number of empirical_attention module.
+            Default: 9.
+        position_embedding_dim (int): The position embedding dimension.
+            Default: -1.
+        position_magnitude (int): A multiplier acting on coord difference.
+            Default: 1.
+        kv_stride (int): The feature stride acting on key/value feature map.
+            Default: 2.
+        q_stride (int): The feature stride acting on query feature map.
+            Default: 1.
+        attention_type (str): A binary indicator string for indicating which
+            items in generalized empirical_attention module are used.
+            Default: '1111'.
+            - '1000' indicates 'query and key content' (appr - appr) item,
+            - '0100' indicates 'query content and relative position'
+              (appr - position) item,
+            - '0010' indicates 'key content only' (bias - appr) item,
+            - '0001' indicates 'relative position only' (bias - position) item.
+    """
+    _abbr_ = 'gen_attention_block'
+    def __init__(self,
+                 in_channels: int,
+                 spatial_range: int = -1,
+                 num_heads: int = 9,
+                 position_embedding_dim: int = -1,
+                 position_magnitude: int = 1,
+                 kv_stride: int = 2,
+                 q_stride: int = 1,
+                 attention_type: str = '1111'):
+        super().__init__()
+        # hard range means local range for non-local operation
+        self.position_embedding_dim = (
+            position_embedding_dim
+            if position_embedding_dim > 0 else in_channels)
+        self.position_magnitude = position_magnitude
+        self.num_heads = num_heads
+        self.in_channels = in_channels
+        self.spatial_range = spatial_range
+        self.kv_stride = kv_stride
+        self.q_stride = q_stride
+        self.attention_type = [bool(int(_)) for _ in attention_type]
+        self.qk_embed_dim = in_channels // num_heads
+        out_c = self.qk_embed_dim * num_heads
+        if self.attention_type[0] or self.attention_type[1]:
+            self.query_conv = nn.Conv2d(
+                in_channels=in_channels,
+                out_channels=out_c,
+                kernel_size=1,
+                bias=False)
+            self.query_conv.kaiming_init = True
+        if self.attention_type[0] or self.attention_type[2]:
+            self.key_conv = nn.Conv2d(
+                in_channels=in_channels,
+                out_channels=out_c,
+                kernel_size=1,
+                bias=False)
+            self.key_conv.kaiming_init = True
+        self.v_dim = in_channels // num_heads
+        self.value_conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=self.v_dim * num_heads,
+            kernel_size=1,
+            bias=False)
+        self.value_conv.kaiming_init = True
+        if self.attention_type[1] or self.attention_type[3]:
+            self.appr_geom_fc_x = nn.Linear(
+                self.position_embedding_dim // 2, out_c, bias=False)
+            self.appr_geom_fc_x.kaiming_init = True
+            self.appr_geom_fc_y = nn.Linear(
+                self.position_embedding_dim // 2, out_c, bias=False)
+            self.appr_geom_fc_y.kaiming_init = True
+        if self.attention_type[2]:
+            stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
+            appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv
+            self.appr_bias = nn.Parameter(appr_bias_value)
+        if self.attention_type[3]:
+            stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
+            geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv
+            self.geom_bias = nn.Parameter(geom_bias_value)
+        self.proj_conv = nn.Conv2d(
+            in_channels=self.v_dim * num_heads,
+            out_channels=in_channels,
+            kernel_size=1,
+            bias=True)
+        self.proj_conv.kaiming_init = True
+        self.gamma = nn.Parameter(torch.zeros(1))
+        if self.spatial_range >= 0:
+            # only works when non local is after 3*3 conv
+            if in_channels == 256:
+                max_len = 84
+            elif in_channels == 512:
+                max_len = 42
+            max_len_kv = int((max_len - 1.0) / self.kv_stride + 1)
+            local_constraint_map = np.ones(
+                (max_len, max_len, max_len_kv, max_len_kv), dtype=int)
+            for iy in range(max_len):
+                for ix in range(max_len):
+                    local_constraint_map[
+                        iy, ix,
+                        max((iy - self.spatial_range) //
+                            self.kv_stride, 0):min((iy + self.spatial_range +
+                                                    1) // self.kv_stride +
+                                                   1, max_len),
+                        max((ix - self.spatial_range) //
+                            self.kv_stride, 0):min((ix + self.spatial_range +
+                                                    1) // self.kv_stride +
+                                                   1, max_len)] = 0
+            self.local_constraint_map = nn.Parameter(
+                torch.from_numpy(local_constraint_map).byte(),
+                requires_grad=False)
+        if self.q_stride > 1:
+            self.q_downsample = nn.AvgPool2d(
+                kernel_size=1, stride=self.q_stride)
+        else:
+            self.q_downsample = None
+        if self.kv_stride > 1:
+            self.kv_downsample = nn.AvgPool2d(
+                kernel_size=1, stride=self.kv_stride)
+        else:
+            self.kv_downsample = None
+        self.init_weights()
+    def get_position_embedding(self,
+                               h,
+                               w,
+                               h_kv,
+                               w_kv,
+                               q_stride,
+                               kv_stride,
+                               device,
+                               dtype,
+                               feat_dim,
+                               wave_length=1000):
+        # the default type of Tensor is float32, leading to type mismatch
+        # in fp16 mode. Cast it to support fp16 mode.
+        h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype)
+        h_idxs = h_idxs.view((h, 1)) * q_stride
+        w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype)
+        w_idxs = w_idxs.view((w, 1)) * q_stride
+        h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to(
+            device=device, dtype=dtype)
+        h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride
+        w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to(
+            device=device, dtype=dtype)
+        w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride
+        # (h, h_kv, 1)
+        h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0)
+        h_diff *= self.position_magnitude
+        # (w, w_kv, 1)
+        w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0)
+        w_diff *= self.position_magnitude
+        feat_range = torch.arange(0, feat_dim / 4).to(
+            device=device, dtype=dtype)
+        dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype)
+        dim_mat = dim_mat**((4. / feat_dim) * feat_range)
+        dim_mat = dim_mat.view((1, 1, -1))
+        embedding_x = torch.cat(
+            ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2)
+        embedding_y = torch.cat(
+            ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2)
+        return embedding_x, embedding_y
+    def forward(self, x_input: torch.Tensor) -> torch.Tensor:
+        num_heads = self.num_heads
+        # use empirical_attention
+        if self.q_downsample is not None:
+            x_q = self.q_downsample(x_input)
+        else:
+            x_q = x_input
+        n, _, h, w = x_q.shape
+        if self.kv_downsample is not None:
+            x_kv = self.kv_downsample(x_input)
+        else:
+            x_kv = x_input
+        _, _, h_kv, w_kv = x_kv.shape
+        if self.attention_type[0] or self.attention_type[1]:
+            proj_query = self.query_conv(x_q).view(
+                (n, num_heads, self.qk_embed_dim, h * w))
+            proj_query = proj_query.permute(0, 1, 3, 2)
+        if self.attention_type[0] or self.attention_type[2]:
+            proj_key = self.key_conv(x_kv).view(
+                (n, num_heads, self.qk_embed_dim, h_kv * w_kv))
+        if self.attention_type[1] or self.attention_type[3]:
+            position_embed_x, position_embed_y = self.get_position_embedding(
+                h, w, h_kv, w_kv, self.q_stride, self.kv_stride,
+                x_input.device, x_input.dtype, self.position_embedding_dim)
+            # (n, num_heads, w, w_kv, dim)
+            position_feat_x = self.appr_geom_fc_x(position_embed_x).\
+                view(1, w, w_kv, num_heads, self.qk_embed_dim).\
+                permute(0, 3, 1, 2, 4).\
+                repeat(n, 1, 1, 1, 1)
+            # (n, num_heads, h, h_kv, dim)
+            position_feat_y = self.appr_geom_fc_y(position_embed_y).\
+                view(1, h, h_kv, num_heads, self.qk_embed_dim).\
+                permute(0, 3, 1, 2, 4).\
+                repeat(n, 1, 1, 1, 1)
+            position_feat_x /= math.sqrt(2)
+            position_feat_y /= math.sqrt(2)
+        # accelerate for saliency only
+        if (np.sum(self.attention_type) == 1) and self.attention_type[2]:
+            appr_bias = self.appr_bias.\
+                view(1, num_heads, 1, self.qk_embed_dim).\
+                repeat(n, 1, 1, 1)
+            energy = torch.matmul(appr_bias, proj_key).\
+                view(n, num_heads, 1, h_kv * w_kv)
+            h = 1
+            w = 1
+        else:
+            # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for
+            if not self.attention_type[0]:
+                energy = torch.zeros(
+                    n,
+                    num_heads,
+                    h,
+                    w,
+                    h_kv,
+                    w_kv,
+                    dtype=x_input.dtype,
+                    device=x_input.device)
+            # attention_type[0]: appr - appr
+            # attention_type[1]: appr - position
+            # attention_type[2]: bias - appr
+            # attention_type[3]: bias - position
+            if self.attention_type[0] or self.attention_type[2]:
+                if self.attention_type[0] and self.attention_type[2]:
+                    appr_bias = self.appr_bias.\
+                        view(1, num_heads, 1, self.qk_embed_dim)
+                    energy = torch.matmul(proj_query + appr_bias, proj_key).\
+                        view(n, num_heads, h, w, h_kv, w_kv)
+                elif self.attention_type[0]:
+                    energy = torch.matmul(proj_query, proj_key).\
+                        view(n, num_heads, h, w, h_kv, w_kv)
+                elif self.attention_type[2]:
+                    appr_bias = self.appr_bias.\
+                        view(1, num_heads, 1, self.qk_embed_dim).\
+                        repeat(n, 1, 1, 1)
+                    energy += torch.matmul(appr_bias, proj_key).\
+                        view(n, num_heads, 1, 1, h_kv, w_kv)
+            if self.attention_type[1] or self.attention_type[3]:
+                if self.attention_type[1] and self.attention_type[3]:
+                    geom_bias = self.geom_bias.\
+                        view(1, num_heads, 1, self.qk_embed_dim)
+                    proj_query_reshape = (proj_query + geom_bias).\
+                        view(n, num_heads, h, w, self.qk_embed_dim)
+                    energy_x = torch.matmul(
+                        proj_query_reshape.permute(0, 1, 3, 2, 4),
+                        position_feat_x.permute(0, 1, 2, 4, 3))
+                    energy_x = energy_x.\
+                        permute(0, 1, 3, 2, 4).unsqueeze(4)
+                    energy_y = torch.matmul(
+                        proj_query_reshape,
+                        position_feat_y.permute(0, 1, 2, 4, 3))
+                    energy_y = energy_y.unsqueeze(5)
+                    energy += energy_x + energy_y
+                elif self.attention_type[1]:
+                    proj_query_reshape = proj_query.\
+                        view(n, num_heads, h, w, self.qk_embed_dim)
+                    proj_query_reshape = proj_query_reshape.\
+                        permute(0, 1, 3, 2, 4)
+                    position_feat_x_reshape = position_feat_x.\
+                        permute(0, 1, 2, 4, 3)
+                    position_feat_y_reshape = position_feat_y.\
+                        permute(0, 1, 2, 4, 3)
+                    energy_x = torch.matmul(proj_query_reshape,
+                                            position_feat_x_reshape)
+                    energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4)
+                    energy_y = torch.matmul(proj_query_reshape,
+                                            position_feat_y_reshape)
+                    energy_y = energy_y.unsqueeze(5)
+                    energy += energy_x + energy_y
+                elif self.attention_type[3]:
+                    geom_bias = self.geom_bias.\
+                        view(1, num_heads, self.qk_embed_dim, 1).\
+                        repeat(n, 1, 1, 1)
+                    position_feat_x_reshape = position_feat_x.\
+                        view(n, num_heads, w * w_kv, self.qk_embed_dim)
+                    position_feat_y_reshape = position_feat_y.\
+                        view(n, num_heads, h * h_kv, self.qk_embed_dim)
+                    energy_x = torch.matmul(position_feat_x_reshape, geom_bias)
+                    energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv)
+                    energy_y = torch.matmul(position_feat_y_reshape, geom_bias)
+                    energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1)
+                    energy += energy_x + energy_y
+            energy = energy.view(n, num_heads, h * w, h_kv * w_kv)
+        if self.spatial_range >= 0:
+            cur_local_constraint_map = \
+                self.local_constraint_map[:h, :w, :h_kv, :w_kv].\
+                contiguous().\
+                view(1, 1, h*w, h_kv*w_kv)
+            energy = energy.masked_fill_(cur_local_constraint_map,
+                                         float('-inf'))
+        attention = F.softmax(energy, 3)
+        proj_value = self.value_conv(x_kv)
+        proj_value_reshape = proj_value.\
+            view((n, num_heads, self.v_dim, h_kv * w_kv)).\
+            permute(0, 1, 3, 2)
+        out = torch.matmul(attention, proj_value_reshape).\
+            permute(0, 1, 3, 2).\
+            contiguous().\
+            view(n, self.v_dim * self.num_heads, h, w)
+        out = self.proj_conv(out)
+        # output is downsampled, upsample back to input size
+        if self.q_downsample is not None:
+            out = F.interpolate(
+                out,
+                size=x_input.shape[2:],
+                mode='bilinear',
+                align_corners=False)
+        out = self.gamma * out + x_input
+        return out
+    def init_weights(self):
+        for m in self.modules():
+            if hasattr(m, 'kaiming_init') and m.kaiming_init:
+                kaiming_init(
+                    m,
+                    mode='fan_in',
+                    nonlinearity='leaky_relu',
+                    bias=0,
+                    distribution='uniform',
+                    a=1)

external/cv/mmcv/cnn/bricks/hsigmoid.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import warnings
+import torch
+import torch.nn as nn
+from mmengine.registry import MODELS
+@MODELS.register_module()
+class HSigmoid(nn.Module):
+    """Hard Sigmoid Module. Apply the hard sigmoid function:
+    Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value)
+    Default: Hsigmoid(x) = min(max((x + 3) / 6, 0), 1)
+    Note:
+        In MMCV v1.4.4, we modified the default value of args to align with
+        PyTorch official.
+    Args:
+        bias (float): Bias of the input feature map. Default: 3.0.
+        divisor (float): Divisor of the input feature map. Default: 6.0.
+        min_value (float): Lower bound value. Default: 0.0.
+        max_value (float): Upper bound value. Default: 1.0.
+    Returns:
+        Tensor: The output tensor.
+    """
+    def __init__(self,
+                 bias: float = 3.0,
+                 divisor: float = 6.0,
+                 min_value: float = 0.0,
+                 max_value: float = 1.0):
+        super().__init__()
+        warnings.warn(
+            'In MMCV v1.4.4, we modified the default value of args to align '
+            'with PyTorch official. Previous Implementation: '
+            'Hsigmoid(x) = min(max((x + 1) / 2, 0), 1). '
+            'Current Implementation: '
+            'Hsigmoid(x) = min(max((x + 3) / 6, 0), 1).')
+        self.bias = bias
+        self.divisor = divisor
+        assert self.divisor != 0
+        self.min_value = min_value
+        self.max_value = max_value
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = (x + self.bias) / self.divisor
+        return x.clamp_(self.min_value, self.max_value)

external/cv/mmcv/cnn/bricks/hswish.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+import torch.nn as nn
+from mmengine.registry import MODELS
+from mmengine.utils import digit_version
+from mmengine.utils.dl_utils import TORCH_VERSION
+class HSwish(nn.Module):
+    """Hard Swish Module.
+    This module applies the hard swish function:
+    .. math::
+        Hswish(x) = x * ReLU6(x + 3) / 6
+    Args:
+        inplace (bool): can optionally do the operation in-place.
+            Default: False.
+    Returns:
+        Tensor: The output tensor.
+    """
+    def __init__(self, inplace: bool = False):
+        super().__init__()
+        self.act = nn.ReLU6(inplace)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x * self.act(x + 3) / 6
+if (TORCH_VERSION == 'parrots'
+        or digit_version(TORCH_VERSION) < digit_version('1.7')):
+    # Hardswish is not supported when PyTorch version < 1.6.
+    # And Hardswish in PyTorch 1.6 does not support inplace.
+    MODELS.register_module(module=HSwish)
+else:
+    MODELS.register_module(module=nn.Hardswish, name='HSwish')

external/cv/mmcv/cnn/bricks/non_local.py ADDED Viewed

	@@ -0,0 +1,313 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from abc import ABCMeta
+from typing import Dict, Optional
+import torch
+import torch.nn as nn
+from mmengine.model import constant_init, normal_init
+from mmengine.registry import MODELS
+from .conv_module import ConvModule
+class _NonLocalNd(nn.Module, metaclass=ABCMeta):
+    """Basic Non-local module.
+    This module is proposed in
+    "Non-local Neural Networks"
+    Paper reference: https://arxiv.org/abs/1711.07971
+    Code reference: https://github.com/AlexHex7/Non-local_pytorch
+    Args:
+        in_channels (int): Channels of the input feature map.
+        reduction (int): Channel reduction ratio. Default: 2.
+        use_scale (bool): Whether to scale pairwise_weight by
+            `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`.
+            Default: True.
+        conv_cfg (None | dict): The config dict for convolution layers.
+            If not specified, it will use `nn.Conv2d` for convolution layers.
+            Default: None.
+        norm_cfg (None | dict): The config dict for normalization layers.
+            Default: None. (This parameter is only applicable to conv_out.)
+        mode (str): Options are `gaussian`, `concatenation`,
+            `embedded_gaussian` and `dot_product`. Default: embedded_gaussian.
+    """
+    def __init__(self,
+                 in_channels: int,
+                 reduction: int = 2,
+                 use_scale: bool = True,
+                 conv_cfg: Optional[Dict] = None,
+                 norm_cfg: Optional[Dict] = None,
+                 mode: str = 'embedded_gaussian',
+                 **kwargs):
+        super().__init__()
+        self.in_channels = in_channels
+        self.reduction = reduction
+        self.use_scale = use_scale
+        self.inter_channels = max(in_channels // reduction, 1)
+        self.mode = mode
+        if mode not in [
+                'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation'
+        ]:
+            raise ValueError("Mode should be in 'gaussian', 'concatenation', "
+                             f"'embedded_gaussian' or 'dot_product', but got "
+                             f'{mode} instead.')
+        # g, theta, phi are defaulted as `nn.ConvNd`.
+        # Here we use ConvModule for potential usage.
+        self.g = ConvModule(
+            self.in_channels,
+            self.inter_channels,
+            kernel_size=1,
+            conv_cfg=conv_cfg,
+            act_cfg=None)  # type: ignore
+        self.conv_out = ConvModule(
+            self.inter_channels,
+            self.in_channels,
+            kernel_size=1,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+        if self.mode != 'gaussian':
+            self.theta = ConvModule(
+                self.in_channels,
+                self.inter_channels,
+                kernel_size=1,
+                conv_cfg=conv_cfg,
+                act_cfg=None)
+            self.phi = ConvModule(
+                self.in_channels,
+                self.inter_channels,
+                kernel_size=1,
+                conv_cfg=conv_cfg,
+                act_cfg=None)
+        if self.mode == 'concatenation':
+            self.concat_project = ConvModule(
+                self.inter_channels * 2,
+                1,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=False,
+                act_cfg=dict(type='ReLU'))
+        self.init_weights(**kwargs)
+    def init_weights(self, std: float = 0.01, zeros_init: bool = True) -> None:
+        if self.mode != 'gaussian':
+            for m in [self.g, self.theta, self.phi]:
+                normal_init(m.conv, std=std)
+        else:
+            normal_init(self.g.conv, std=std)
+        if zeros_init:
+            if self.conv_out.norm_cfg is None:
+                constant_init(self.conv_out.conv, 0)
+            else:
+                constant_init(self.conv_out.norm, 0)
+        else:
+            if self.conv_out.norm_cfg is None:
+                normal_init(self.conv_out.conv, std=std)
+            else:
+                normal_init(self.conv_out.norm, std=std)
+    def gaussian(self, theta_x: torch.Tensor,
+                 phi_x: torch.Tensor) -> torch.Tensor:
+        # NonLocal1d pairwise_weight: [N, H, H]
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+        pairwise_weight = torch.matmul(theta_x, phi_x)
+        pairwise_weight = pairwise_weight.softmax(dim=-1)
+        return pairwise_weight
+    def embedded_gaussian(self, theta_x: torch.Tensor,
+                          phi_x: torch.Tensor) -> torch.Tensor:
+        # NonLocal1d pairwise_weight: [N, H, H]
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+        pairwise_weight = torch.matmul(theta_x, phi_x)
+        if self.use_scale:
+            # theta_x.shape[-1] is `self.inter_channels`
+            pairwise_weight /= theta_x.shape[-1]**0.5
+        pairwise_weight = pairwise_weight.softmax(dim=-1)
+        return pairwise_weight
+    def dot_product(self, theta_x: torch.Tensor,
+                    phi_x: torch.Tensor) -> torch.Tensor:
+        # NonLocal1d pairwise_weight: [N, H, H]
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+        pairwise_weight = torch.matmul(theta_x, phi_x)
+        pairwise_weight /= pairwise_weight.shape[-1]
+        return pairwise_weight
+    def concatenation(self, theta_x: torch.Tensor,
+                      phi_x: torch.Tensor) -> torch.Tensor:
+        # NonLocal1d pairwise_weight: [N, H, H]
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+        h = theta_x.size(2)
+        w = phi_x.size(3)
+        theta_x = theta_x.repeat(1, 1, 1, w)
+        phi_x = phi_x.repeat(1, 1, h, 1)
+        concat_feature = torch.cat([theta_x, phi_x], dim=1)
+        pairwise_weight = self.concat_project(concat_feature)
+        n, _, h, w = pairwise_weight.size()
+        pairwise_weight = pairwise_weight.view(n, h, w)
+        pairwise_weight /= pairwise_weight.shape[-1]
+        return pairwise_weight
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # Assume `reduction = 1`, then `inter_channels = C`
+        # or `inter_channels = C` when `mode="gaussian"`
+        # NonLocal1d x: [N, C, H]
+        # NonLocal2d x: [N, C, H, W]
+        # NonLocal3d x: [N, C, T, H, W]
+        n = x.size(0)
+        # NonLocal1d g_x: [N, H, C]
+        # NonLocal2d g_x: [N, HxW, C]
+        # NonLocal3d g_x: [N, TxHxW, C]
+        g_x = self.g(x).view(n, self.inter_channels, -1)
+        g_x = g_x.permute(0, 2, 1)
+        # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H]
+        # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW]
+        # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW]
+        if self.mode == 'gaussian':
+            theta_x = x.view(n, self.in_channels, -1)
+            theta_x = theta_x.permute(0, 2, 1)
+            if self.sub_sample:
+                phi_x = self.phi(x).view(n, self.in_channels, -1)
+            else:
+                phi_x = x.view(n, self.in_channels, -1)
+        elif self.mode == 'concatenation':
+            theta_x = self.theta(x).view(n, self.inter_channels, -1, 1)
+            phi_x = self.phi(x).view(n, self.inter_channels, 1, -1)
+        else:
+            theta_x = self.theta(x).view(n, self.inter_channels, -1)
+            theta_x = theta_x.permute(0, 2, 1)
+            phi_x = self.phi(x).view(n, self.inter_channels, -1)
+        pairwise_func = getattr(self, self.mode)
+        # NonLocal1d pairwise_weight: [N, H, H]
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+        pairwise_weight = pairwise_func(theta_x, phi_x)
+        # NonLocal1d y: [N, H, C]
+        # NonLocal2d y: [N, HxW, C]
+        # NonLocal3d y: [N, TxHxW, C]
+        y = torch.matmul(pairwise_weight, g_x)
+        # NonLocal1d y: [N, C, H]
+        # NonLocal2d y: [N, C, H, W]
+        # NonLocal3d y: [N, C, T, H, W]
+        y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels,
+                                                    *x.size()[2:])
+        output = x + self.conv_out(y)
+        return output
+class NonLocal1d(_NonLocalNd):
+    """1D Non-local module.
+    Args:
+        in_channels (int): Same as `NonLocalND`.
+        sub_sample (bool): Whether to apply max pooling after pairwise
+            function (Note that the `sub_sample` is applied on spatial only).
+            Default: False.
+        conv_cfg (None | dict): Same as `NonLocalND`.
+            Default: dict(type='Conv1d').
+    """
+    def __init__(self,
+                 in_channels: int,
+                 sub_sample: bool = False,
+                 conv_cfg: Dict = dict(type='Conv1d'),
+                 **kwargs):
+        super().__init__(in_channels, conv_cfg=conv_cfg, **kwargs)
+        self.sub_sample = sub_sample
+        if sub_sample:
+            max_pool_layer = nn.MaxPool1d(kernel_size=2)
+            self.g = nn.Sequential(self.g, max_pool_layer)
+            if self.mode != 'gaussian':
+                self.phi = nn.Sequential(self.phi, max_pool_layer)
+            else:
+                self.phi = max_pool_layer
+@MODELS.register_module()
+class NonLocal2d(_NonLocalNd):
+    """2D Non-local module.
+    Args:
+        in_channels (int): Same as `NonLocalND`.
+        sub_sample (bool): Whether to apply max pooling after pairwise
+            function (Note that the `sub_sample` is applied on spatial only).
+            Default: False.
+        conv_cfg (None | dict): Same as `NonLocalND`.
+            Default: dict(type='Conv2d').
+    """
+    _abbr_ = 'nonlocal_block'
+    def __init__(self,
+                 in_channels: int,
+                 sub_sample: bool = False,
+                 conv_cfg: Dict = dict(type='Conv2d'),
+                 **kwargs):
+        super().__init__(in_channels, conv_cfg=conv_cfg, **kwargs)
+        self.sub_sample = sub_sample
+        if sub_sample:
+            max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
+            self.g = nn.Sequential(self.g, max_pool_layer)
+            if self.mode != 'gaussian':
+                self.phi = nn.Sequential(self.phi, max_pool_layer)
+            else:
+                self.phi = max_pool_layer
+class NonLocal3d(_NonLocalNd):
+    """3D Non-local module.
+    Args:
+        in_channels (int): Same as `NonLocalND`.
+        sub_sample (bool): Whether to apply max pooling after pairwise
+            function (Note that the `sub_sample` is applied on spatial only).
+            Default: False.
+        conv_cfg (None | dict): Same as `NonLocalND`.
+            Default: dict(type='Conv3d').
+    """
+    def __init__(self,
+                 in_channels: int,
+                 sub_sample: bool = False,
+                 conv_cfg: Dict = dict(type='Conv3d'),
+                 **kwargs):
+        super().__init__(in_channels, conv_cfg=conv_cfg, **kwargs)
+        self.sub_sample = sub_sample
+        if sub_sample:
+            max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
+            self.g = nn.Sequential(self.g, max_pool_layer)
+            if self.mode != 'gaussian':
+                self.phi = nn.Sequential(self.phi, max_pool_layer)
+            else:
+                self.phi = max_pool_layer

external/cv/mmcv/cnn/bricks/norm.py ADDED Viewed

	@@ -0,0 +1,161 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import inspect
+from typing import Dict, Tuple, Union
+import torch.nn as nn
+from mmengine.registry import MODELS
+from mmengine.utils import is_tuple_of
+from mmengine.utils.dl_utils.parrots_wrapper import (SyncBatchNorm, _BatchNorm,
+                                                     _InstanceNorm)
+MODELS.register_module('BN', module=nn.BatchNorm2d)
+MODELS.register_module('BN1d', module=nn.BatchNorm1d)
+MODELS.register_module('BN2d', module=nn.BatchNorm2d)
+MODELS.register_module('BN3d', module=nn.BatchNorm3d)
+MODELS.register_module('SyncBN', module=SyncBatchNorm)
+MODELS.register_module('GN', module=nn.GroupNorm)
+MODELS.register_module('LN', module=nn.LayerNorm)
+MODELS.register_module('IN', module=nn.InstanceNorm2d)
+MODELS.register_module('IN1d', module=nn.InstanceNorm1d)
+MODELS.register_module('IN2d', module=nn.InstanceNorm2d)
+MODELS.register_module('IN3d', module=nn.InstanceNorm3d)
+def infer_abbr(class_type):
+    """Infer abbreviation from the class name.
+    When we build a norm layer with `build_norm_layer()`, we want to preserve
+    the norm type in variable names, e.g, self.bn1, self.gn. This method will
+    infer the abbreviation to map class types to abbreviations.
+    Rule 1: If the class has the property "_abbr_", return the property.
+    Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or
+    InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and
+    "in" respectively.
+    Rule 3: If the class name contains "batch", "group", "layer" or "instance",
+    the abbreviation of this layer will be "bn", "gn", "ln" and "in"
+    respectively.
+    Rule 4: Otherwise, the abbreviation falls back to "norm".
+    Args:
+        class_type (type): The norm layer type.
+    Returns:
+        str: The inferred abbreviation.
+    """
+    if not inspect.isclass(class_type):
+        raise TypeError(
+            f'class_type must be a type, but got {type(class_type)}')
+    if hasattr(class_type, '_abbr_'):
+        return class_type._abbr_
+    if issubclass(class_type, _InstanceNorm):  # IN is a subclass of BN
+        return 'in'
+    elif issubclass(class_type, _BatchNorm):
+        return 'bn'
+    elif issubclass(class_type, nn.GroupNorm):
+        return 'gn'
+    elif issubclass(class_type, nn.LayerNorm):
+        return 'ln'
+    else:
+        class_name = class_type.__name__.lower()
+        if 'batch' in class_name:
+            return 'bn'
+        elif 'group' in class_name:
+            return 'gn'
+        elif 'layer' in class_name:
+            return 'ln'
+        elif 'instance' in class_name:
+            return 'in'
+        else:
+            return 'norm_layer'
+def build_norm_layer(cfg: Dict,
+                     num_features: int,
+                     postfix: Union[int, str] = '') -> Tuple[str, nn.Module]:
+    """Build normalization layer.
+    Args:
+        cfg (dict): The norm layer config, which should contain:
+            - type (str): Layer type.
+            - layer args: Args needed to instantiate a norm layer.
+            - requires_grad (bool, optional): Whether stop gradient updates.
+        num_features (int): Number of input channels.
+        postfix (int | str): The postfix to be appended into norm abbreviation
+            to create named layer.
+    Returns:
+        tuple[str, nn.Module]: The first element is the layer name consisting
+        of abbreviation and postfix, e.g., bn1, gn. The second element is the
+        created norm layer.
+    """
+    if not isinstance(cfg, dict):
+        raise TypeError('cfg must be a dict')
+    if 'type' not in cfg:
+        raise KeyError('the cfg dict must contain the key "type"')
+    cfg_ = cfg.copy()
+    layer_type = cfg_.pop('type')
+    if inspect.isclass(layer_type):
+        norm_layer = layer_type
+    else:
+        # Switch registry to the target scope. If `norm_layer` cannot be found
+        # in the registry, fallback to search `norm_layer` in the
+        # mmengine.MODELS.
+        with MODELS.switch_scope_and_registry(None) as registry:
+            norm_layer = registry.get(layer_type)
+        if norm_layer is None:
+            raise KeyError(f'Cannot find {norm_layer} in registry under '
+                           f'scope name {registry.scope}')
+    abbr = infer_abbr(norm_layer)
+    assert isinstance(postfix, (int, str))
+    name = abbr + str(postfix)
+    requires_grad = cfg_.pop('requires_grad', True)
+    cfg_.setdefault('eps', 1e-5)
+    if norm_layer is not nn.GroupNorm:
+        layer = norm_layer(num_features, **cfg_)
+        if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'):
+            layer._specify_ddp_gpu_num(1)
+    else:
+        assert 'num_groups' in cfg_
+        layer = norm_layer(num_channels=num_features, **cfg_)
+    for param in layer.parameters():
+        param.requires_grad = requires_grad
+    return name, layer
+def is_norm(layer: nn.Module,
+            exclude: Union[type, tuple, None] = None) -> bool:
+    """Check if a layer is a normalization layer.
+    Args:
+        layer (nn.Module): The layer to be checked.
+        exclude (type | tuple[type]): Types to be excluded.
+    Returns:
+        bool: Whether the layer is a norm layer.
+    """
+    if exclude is not None:
+        if not isinstance(exclude, tuple):
+            exclude = (exclude, )
+        if not is_tuple_of(exclude, type):
+            raise TypeError(
+                f'"exclude" must be either None or type or a tuple of types, '
+                f'but got {type(exclude)}: {exclude}')
+    if exclude and isinstance(layer, exclude):
+        return False
+    all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm)
+    return isinstance(layer, all_norm_bases)

external/cv/mmcv/cnn/bricks/padding.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import inspect
+from typing import Dict
+import torch.nn as nn
+from mmengine.registry import MODELS
+MODELS.register_module('zero', module=nn.ZeroPad2d)
+MODELS.register_module('reflect', module=nn.ReflectionPad2d)
+MODELS.register_module('replicate', module=nn.ReplicationPad2d)
+def build_padding_layer(cfg: Dict, *args, **kwargs) -> nn.Module:
+    """Build padding layer.
+    Args:
+        cfg (dict): The padding layer config, which should contain:
+            - type (str): Layer type.
+            - layer args: Args needed to instantiate a padding layer.
+    Returns:
+        nn.Module: Created padding layer.
+    """
+    if not isinstance(cfg, dict):
+        raise TypeError('cfg must be a dict')
+    if 'type' not in cfg:
+        raise KeyError('the cfg dict must contain the key "type"')
+    cfg_ = cfg.copy()
+    padding_type = cfg_.pop('type')
+    if inspect.isclass(padding_type):
+        return padding_type(*args, **kwargs, **cfg_)
+    # Switch registry to the target scope. If `padding_layer` cannot be found
+    # in the registry, fallback to search `padding_layer` in the
+    # mmengine.MODELS.
+    with MODELS.switch_scope_and_registry(None) as registry:
+        padding_layer = registry.get(padding_type)
+    if padding_layer is None:
+        raise KeyError(f'Cannot find {padding_layer} in registry under scope '
+                       f'name {registry.scope}')
+    layer = padding_layer(*args, **kwargs, **cfg_)
+    return layer

external/cv/mmcv/cnn/bricks/plugin.py ADDED Viewed

	@@ -0,0 +1,106 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import inspect
+import platform
+from typing import Dict, Tuple, Union
+import torch.nn as nn
+from mmengine.registry import MODELS
+if platform.system() == 'Windows':
+    import regex as re  # type: ignore
+else:
+    import re  # type: ignore
+def infer_abbr(class_type: type) -> str:
+    """Infer abbreviation from the class name.
+    This method will infer the abbreviation to map class types to
+    abbreviations.
+    Rule 1: If the class has the property "abbr", return the property.
+    Rule 2: Otherwise, the abbreviation falls back to snake case of class
+    name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``.
+    Args:
+        class_type (type): The norm layer type.
+    Returns:
+        str: The inferred abbreviation.
+    """
+    def camel2snack(word):
+        """Convert camel case word into snack case.
+        Modified from `inflection lib
+        <https://inflection.readthedocs.io/en/latest/#inflection.underscore>`_.
+        Example::
+            >>> camel2snack("FancyBlock")
+            'fancy_block'
+        """
+        word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word)
+        word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word)
+        word = word.replace('-', '_')
+        return word.lower()
+    if not inspect.isclass(class_type):
+        raise TypeError(
+            f'class_type must be a type, but got {type(class_type)}')
+    if hasattr(class_type, '_abbr_'):
+        return class_type._abbr_  # type: ignore
+    else:
+        return camel2snack(class_type.__name__)
+def build_plugin_layer(cfg: Dict,
+                       postfix: Union[int, str] = '',
+                       **kwargs) -> Tuple[str, nn.Module]:
+    """Build plugin layer.
+    Args:
+        cfg (dict): cfg should contain:
+            - type (str): identify plugin layer type.
+            - layer args: args needed to instantiate a plugin layer.
+        postfix (int, str): appended into norm abbreviation to
+            create named layer. Default: ''.
+    Returns:
+        tuple[str, nn.Module]: The first one is the concatenation of
+        abbreviation and postfix. The second is the created plugin layer.
+    """
+    if not isinstance(cfg, dict):
+        raise TypeError('cfg must be a dict')
+    if 'type' not in cfg:
+        raise KeyError('the cfg dict must contain the key "type"')
+    cfg_ = cfg.copy()
+    layer_type = cfg_.pop('type')
+    if inspect.isclass(layer_type):
+        plugin_layer = layer_type
+    else:
+        # Switch registry to the target scope. If `plugin_layer` cannot be
+        # found in the registry, fallback to search `plugin_layer` in the
+        # mmengine.MODELS.
+        with MODELS.switch_scope_and_registry(None) as registry:
+            plugin_layer = registry.get(layer_type)
+        if plugin_layer is None:
+            raise KeyError(
+                f'Cannot find {plugin_layer} in registry under scope '
+                f'name {registry.scope}')
+    abbr = infer_abbr(plugin_layer)
+    assert isinstance(postfix, (int, str))
+    name = abbr + str(postfix)
+    layer = plugin_layer(**kwargs, **cfg_)
+    return name, layer