Spaces:

ljsabc
/

AnimeIns_CPU

Runtime error

App Files Files Community

ljsabc commited on Dec 5, 2023

Commit

8aa4f1e

1 Parent(s): 395d300

add utils

Browse files

Files changed (10) hide show

utils/__init__.py +0 -0
utils/booru_tagger.py +116 -0
utils/constants.py +82 -0
utils/cupy_utils.py +122 -0
utils/effects.py +182 -0
utils/env_utils.py +65 -0
utils/helper_math.h +1449 -0
utils/io_utils.py +473 -0
utils/logger.py +20 -0
utils/mmdet_custom_hooks.py +223 -0

utils/__init__.py ADDED Viewed

File without changes

utils/booru_tagger.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import os
+import gc
+import pandas as pd
+import numpy as np
+from onnxruntime import InferenceSession
+from typing import Tuple, List, Dict
+from io import BytesIO
+from PIL import Image
+import cv2
+from pathlib import Path
+from tqdm import tqdm
+def make_square(img, target_size):
+    old_size = img.shape[:2]
+    desired_size = max(old_size)
+    desired_size = max(desired_size, target_size)
+    delta_w = desired_size - old_size[1]
+    delta_h = desired_size - old_size[0]
+    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
+    left, right = delta_w // 2, delta_w - (delta_w // 2)
+    color = [255, 255, 255]
+    new_im = cv2.copyMakeBorder(
+        img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
+    )
+    return new_im
+def smart_resize(img, size):
+    # Assumes the image has already gone through make_square
+    if img.shape[0] > size:
+        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)
+    elif img.shape[0] < size:
+        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
+    return img
+class Tagger :
+    def __init__(self, filename) -> None:
+        self.model = InferenceSession(filename, providers=['CUDAExecutionProvider'])
+        [root, _] = os.path.split(filename)
+        self.tags = pd.read_csv(os.path.join(root, 'selected_tags.csv') if root else 'selected_tags.csv')
+        _, self.height, _, _ = self.model.get_inputs()[0].shape
+        characters = self.tags.loc[self.tags['category'] == 4]
+        self.characters = set(characters['name'].values.tolist())
+    def label(self, image: Image) -> Dict[str, float] :
+        # alpha to white
+        image = image.convert('RGBA')
+        new_image = Image.new('RGBA', image.size, 'WHITE')
+        new_image.paste(image, mask=image)
+        image = new_image.convert('RGB')
+        image = np.asarray(image)
+        # PIL RGB to OpenCV BGR
+        image = image[:, :, ::-1]
+        image = make_square(image, self.height)
+        image = smart_resize(image, self.height)
+        image = image.astype(np.float32)
+        image = np.expand_dims(image, 0)
+        # evaluate model
+        input_name = self.model.get_inputs()[0].name
+        label_name = self.model.get_outputs()[0].name
+        confidents = self.model.run([label_name], {input_name: image})[0]
+        tags = self.tags[:][['name']]
+        tags['confidents'] = confidents[0]
+        # first 4 items are for rating (general, sensitive, questionable, explicit)
+        ratings = dict(tags[:4].values)
+        # rest are regular tags
+        tags = dict(tags[4:].values)
+        tags = {t: v for t, v in tags.items() if v > 0.5}
+        return tags
+    def label_cv2_bgr(self, image: np.ndarray) -> Dict[str, float] :
+        # image in BGR u8
+        image = make_square(image, self.height)
+        image = smart_resize(image, self.height)
+        image = image.astype(np.float32)
+        image = np.expand_dims(image, 0)
+        # evaluate model
+        input_name = self.model.get_inputs()[0].name
+        label_name = self.model.get_outputs()[0].name
+        confidents = self.model.run([label_name], {input_name: image})[0]
+        tags = self.tags[:][['name']]
+        cats = self.tags[:][['category']]
+        tags['confidents'] = confidents[0]
+        # first 4 items are for rating (general, sensitive, questionable, explicit)
+        ratings = dict(tags[:4].values)
+        # rest are regular tags
+        tags = dict(tags[4:].values)
+        tags = [t for t, v in tags.items() if v > 0.5]
+        character_str = []
+        for t in tags:
+            if t in self.characters:
+                character_str.append(t)
+        return tags, character_str
+if __name__ == '__main__':
+    modelp = r'models/wd-v1-4-swinv2-tagger-v2/model.onnx'
+    tagger = Tagger(modelp)

utils/constants.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import torch
+CATEGORIES = [
+    {"id": 0, "name": "object", "isthing": 1}
+]
+IMAGE_ID_ZFILL = 12
+COLOR_PALETTE = [
+    (220, 20, 60), (119, 11, 32), (0, 0, 142), (0, 0, 230), (106, 0, 228),
+    (0, 60, 100), (0, 80, 100), (0, 0, 70), (0, 0, 192), (250, 170, 30),
+    (100, 170, 30), (220, 220, 0), (175, 116, 175), (250, 0, 30),
+    (165, 42, 42), (255, 77, 255), (0, 226, 252), (182, 182, 255),
+    (0, 82, 0), (120, 166, 157), (110, 76, 0), (174, 57, 255),
+    (199, 100, 0), (72, 0, 118), (255, 179, 240), (0, 125, 92),
+    (209, 0, 151), (188, 208, 182), (0, 220, 176), (255, 99, 164),
+    (92, 0, 73), (133, 129, 255), (78, 180, 255), (0, 228, 0),
+    (174, 255, 243), (45, 89, 255), (134, 134, 103), (145, 148, 174),
+    (255, 208, 186), (197, 226, 255), (171, 134, 1), (109, 63, 54),
+    (207, 138, 255), (151, 0, 95), (9, 80, 61), (84, 105, 51),
+    (74, 65, 105), (166, 196, 102), (208, 195, 210), (255, 109, 65),
+    (0, 143, 149), (179, 0, 194), (209, 99, 106), (5, 121, 0),
+    (227, 255, 205), (147, 186, 208), (153, 69, 1), (3, 95, 161),
+    (163, 255, 0), (119, 0, 170), (0, 182, 199), (0, 165, 120),
+    (183, 130, 88), (95, 32, 0), (130, 114, 135), (110, 129, 133),
+    (166, 74, 118), (219, 142, 185), (79, 210, 114), (178, 90, 62),
+    (65, 70, 15), (127, 167, 115), (59, 105, 106), (142, 108, 45),
+    (196, 172, 0), (95, 54, 80), (128, 76, 255), (201, 57, 1),
+    (246, 0, 122), (191, 162, 208), (255, 255, 128), (147, 211, 203),
+    (150, 100, 100), (168, 171, 172), (146, 112, 198), (210, 170, 100),
+    (92, 136, 89), (218, 88, 184), (241, 129, 0), (217, 17, 255),
+    (124, 74, 181), (70, 70, 70), (255, 228, 255), (154, 208, 0),
+    (193, 0, 92), (76, 91, 113), (255, 180, 195), (106, 154, 176),
+    (230, 150, 140), (60, 143, 255), (128, 64, 128), (92, 82, 55),
+    (254, 212, 124), (73, 77, 174), (255, 160, 98), (255, 255, 255),
+    (104, 84, 109), (169, 164, 131), (225, 199, 255), (137, 54, 74),
+    (135, 158, 223), (7, 246, 231), (107, 255, 200), (58, 41, 149),
+    (183, 121, 142), (255, 73, 97), (107, 142, 35), (190, 153, 153),
+    (146, 139, 141), (70, 130, 180), (134, 199, 156), (209, 226, 140),
+    (96, 36, 108), (96, 96, 96), (64, 170, 64), (152, 251, 152),
+    (208, 229, 228), (206, 186, 171), (152, 161, 64), (116, 112, 0),
+    (0, 114, 143), (102, 102, 156), (250, 141, 255)
+]
+class Colors:
+    # Ultralytics color palette https://ultralytics.com/
+    def __init__(self):
+        # hex = matplotlib.colors.TABLEAU_COLORS.values()
+        hexs = ('FF1010', '10FF10', 'FFF010', '100FFF', '0018EC', 'FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
+                '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
+        self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
+        self.n = len(self.palette)
+    def __call__(self, i, bgr=True):
+        c = self.palette[int(i) % self.n]
+        return (c[2], c[1], c[0]) if bgr else c
+    @staticmethod
+    def hex2rgb(h):  # rgb order (PIL)
+        return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
+colors = Colors()
+def get_color(idx):
+    if idx == -1:
+        return 255
+    else:
+        return colors(idx)
+MULTIPLE_TAGS = {'2girls', '3girls', '4girls', '5girls', '6+girls', 'multiple_girls',
+'2boys', '3boys', '4boys', '5boys', '6+boys', 'multiple_boys',
+'2others', '3others', '4others', '5others', '6+others', 'multiple_others'}
+if hasattr(torch, 'cuda'):
+    DEFAULT_DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+else:
+    DEFAULT_DEVICE = 'cpu'
+DEFAULT_DETECTOR_CKPT = 'models/AnimeInstanceSegmentation/rtmdetl_e60.ckpt'
+DEFAULT_DEPTHREFINE_CKPT = 'models/AnimeInstanceSegmentation/kenburns_depth_refinenet.ckpt'
+DEFAULT_INPAINTNET_CKPT = 'models/AnimeInstanceSegmentation/kenburns_inpaintnet.ckpt'
+DEPTH_ZOE_CKPT = 'models/AnimeInstanceSegmentation/ZoeD_M12_N.pt'

utils/cupy_utils.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import re
+import os
+import cupy
+import os.path as osp
+import torch
+@cupy.memoize(for_each_device=True)
+def launch_kernel(strFunction, strKernel):
+    if 'CUDA_HOME' not in os.environ:
+        os.environ['CUDA_HOME'] = cupy.cuda.get_cuda_path()
+    # end
+    # , options=tuple([ '-I ' + os.environ['CUDA_HOME'], '-I ' + os.environ['CUDA_HOME'] + '/include' ])
+    return cupy.RawKernel(strKernel, strFunction)
+def preprocess_kernel(strKernel, objVariables):
+    path_to_math_helper = osp.join(osp.dirname(osp.abspath(__file__)), 'helper_math.h')
+    strKernel = '''
+        #include <{{HELPER_PATH}}>
+        __device__ __forceinline__ float atomicMin(const float* buffer, float dblValue) {
+            int intValue = __float_as_int(*buffer);
+            while (__int_as_float(intValue) > dblValue) {
+                intValue = atomicCAS((int*) (buffer), intValue, __float_as_int(dblValue));
+            }
+            return __int_as_float(intValue);
+        }
+        __device__ __forceinline__ float atomicMax(const float* buffer, float dblValue) {
+            int intValue = __float_as_int(*buffer);
+            while (__int_as_float(intValue) < dblValue) {
+                intValue = atomicCAS((int*) (buffer), intValue, __float_as_int(dblValue));
+            }
+            return __int_as_float(intValue);
+        }
+    '''.replace('{{HELPER_PATH}}', path_to_math_helper) + strKernel
+    # end
+    for strVariable in objVariables:
+        objValue = objVariables[strVariable]
+        if type(objValue) == int:
+            strKernel = strKernel.replace('{{' + strVariable + '}}', str(objValue))
+        elif type(objValue) == float:
+            strKernel = strKernel.replace('{{' + strVariable + '}}', str(objValue))
+        elif type(objValue) == str:
+            strKernel = strKernel.replace('{{' + strVariable + '}}', objValue)
+        # end
+    # end
+    while True:
+        objMatch = re.search('(SIZE_)([0-4])(\()([^\)]*)(\))', strKernel)
+        if objMatch is None:
+            break
+        # end
+        intArg = int(objMatch.group(2))
+        strTensor = objMatch.group(4)
+        intSizes = objVariables[strTensor].size()
+        strKernel = strKernel.replace(objMatch.group(), str(intSizes[intArg] if torch.is_tensor(intSizes[intArg]) == False else intSizes[intArg].item()))
+    # end
+    while True:
+        objMatch = re.search('(STRIDE_)([0-4])(\()([^\)]*)(\))', strKernel)
+        if objMatch is None:
+            break
+        # end
+        intArg = int(objMatch.group(2))
+        strTensor = objMatch.group(4)
+        intStrides = objVariables[strTensor].stride()
+        strKernel = strKernel.replace(objMatch.group(), str(intStrides[intArg] if torch.is_tensor(intStrides[intArg]) == False else intStrides[intArg].item()))
+    # end
+    while True:
+        objMatch = re.search('(OFFSET_)([0-4])(\()([^\)]+)(\))', strKernel)
+        if objMatch is None:
+            break
+        # end
+        intArgs = int(objMatch.group(2))
+        strArgs = objMatch.group(4).split(',')
+        strTensor = strArgs[0]
+        intStrides = objVariables[strTensor].stride()
+        strIndex = [ '((' + strArgs[intArg + 1].replace('{', '(').replace('}', ')').strip() + ')*' + str(intStrides[intArg] if torch.is_tensor(intStrides[intArg]) == False else intStrides[intArg].item()) + ')' for intArg in range(intArgs) ]
+        strKernel = strKernel.replace(objMatch.group(0), '(' + str.join('+', strIndex) + ')')
+    # end
+    while True:
+        objMatch = re.search('(VALUE_)([0-4])(\()([^\)]+)(\))', strKernel)
+        if objMatch is None:
+            break
+        # end
+        intArgs = int(objMatch.group(2))
+        strArgs = objMatch.group(4).split(',')
+        strTensor = strArgs[0]
+        intStrides = objVariables[strTensor].stride()
+        strIndex = [ '((' + strArgs[intArg + 1].replace('{', '(').replace('}', ')').strip() + ')*' + str(intStrides[intArg] if torch.is_tensor(intStrides[intArg]) == False else intStrides[intArg].item()) + ')' for intArg in range(intArgs) ]
+        strKernel = strKernel.replace(objMatch.group(0), strTensor + '[' + str.join('+', strIndex) + ']')
+    # end
+    return strKernel

utils/effects.py ADDED Viewed

	@@ -0,0 +1,182 @@

+from numba import jit, njit
+import numpy as np
+import time
+import cv2
+import math
+from pathlib import Path
+import os.path as osp
+import torch
+from .cupy_utils import launch_kernel, preprocess_kernel
+import cupy
+def bokeh_filter_cupy(img, depth, dx, dy, im_h, im_w, num_samples=32):
+    blurred = img.clone()
+    n = im_h * im_w
+    str_kernel = '''
+        extern "C" __global__ void kernel_bokeh(
+            const int n,
+            const int h,
+            const int w,
+            const int nsamples,
+            const float dx,
+            const float dy,
+            const float* img,
+            const float* depth,
+            float* blurred
+        ) {
+            const int im_size = min(h, w);
+            const int sample_offset = nsamples / 2;
+            for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n * 3; intIndex += blockDim.x * gridDim.x) {
+                const int intSample = intIndex / 3;
+                const int c = intIndex % 3;
+                const int y = ( intSample / w) % h;
+                const int x = intSample % w;
+                const int flatten_xy = y * w + x;
+                const int fid = flatten_xy * 3 + c;
+                const float d = depth[flatten_xy];
+                const float _dx = dx * d;
+                const float _dy = dy * d;
+                float weight = 0;
+                float color = 0;
+                for (int s = 0; s < nsamples; s += 1) {
+                    const int sp = (s - sample_offset) * im_size;
+                    const int x_ = x + int(round(_dx * sp));
+                    const int y_ = y + int(round(_dy * sp));
+                    if ((x_ >= w) | (y_ >= h) | (x_ < 0) | (y_ < 0))
+                        continue;
+                    const int flatten_xy_ = y_ * w + x_;
+                    const float w_ = depth[flatten_xy_];
+                    weight += w_;
+                    const int fid_ = flatten_xy_ * 3 + c;
+                    color += img[fid_] * w_;
+                }
+                if (weight != 0) {
+                    color /= weight;
+                }
+                else {
+                    color = img[fid];
+                }
+                blurred[fid] = color;
+            }
+        }
+    '''
+    launch_kernel('kernel_bokeh', str_kernel)(
+        grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
+        block=tuple([ 512, 1, 1 ]),
+        args=[ cupy.int32(n), cupy.int32(im_h), cupy.int32(im_w), \
+              cupy.int32(num_samples), cupy.float32(dx), cupy.float32(dy),
+              img.data_ptr(), depth.data_ptr(), blurred.data_ptr() ]
+    )
+    return blurred
+def np2flatten_tensor(arr: np.ndarray, to_cuda: bool = True) -> torch.Tensor:
+    c = 1
+    if len(arr.shape) == 3:
+        c = arr.shape[2]
+    else:
+        arr = arr[..., None]
+    arr = arr.transpose((2, 0, 1))[None, ...]
+    t = torch.from_numpy(arr).view(1, c, -1)
+    if to_cuda:
+        t = t.cuda()
+    return t
+def ftensor2img(t: torch.Tensor, im_h, im_w):
+    t = t.detach().cpu().numpy().squeeze()
+    c = t.shape[0]
+    t = t.transpose((1, 0)).reshape((im_h, im_w, c))
+    return t
+@njit
+def bokeh_filter(img, depth, dx, dy, num_samples=32):
+    sample_offset = num_samples // 2
+    # _scale = 0.0005
+    # depth = depth * _scale
+    im_h, im_w = img.shape[0], img.shape[1]
+    im_size = min(im_h, im_w)
+    blured = np.zeros_like(img)
+    for x in range(im_w):
+        for y in range(im_h):
+            d = depth[y, x]
+            _color = np.array([0, 0, 0], dtype=np.float32)
+            _dx = dx * d
+            _dy = dy * d
+            weight = 0
+            for s in range(num_samples):
+                s = (s - sample_offset) * im_size
+                x_ = x + int(round(_dx * s))
+                y_ = y + int(round(_dy * s))
+                if x_ >= im_w or y_ >= im_h or x_ < 0 or y_ < 0:
+                    continue
+                _w = depth[y_, x_]
+                weight += _w
+                _color += img[y_, x_] * _w
+            if weight == 0:
+                blured[y, x] = img[y, x]
+            else:
+                blured[y, x] = _color / np.array([weight, weight, weight], dtype=np.float32)
+    return blured
+def bokeh_blur(img, depth, num_samples=32, lightness_factor=10, depth_factor=2, use_cuda=False, focal_plane=None):
+    img = np.ascontiguousarray(img)
+    if depth is not None:
+        depth = depth.astype(np.float32)
+        if focal_plane is not None:
+            depth = depth.max() - np.abs(depth - focal_plane)
+        if depth_factor != 1:
+            depth = np.power(depth, depth_factor)
+        depth = depth - depth.min()
+        depth = depth.astype(np.float32) / depth.max()
+        depth = 1 - depth
+    img = img.astype(np.float32) / 255
+    img_hightlighted = np.power(img, lightness_factor)
+    # img =
+    im_h, im_w = img.shape[:2]
+    PI = math.pi
+    _scale = 0.0005
+    depth = depth * _scale
+    if use_cuda:
+        img_hightlighted = np2flatten_tensor(img_hightlighted, True)
+        depth = np2flatten_tensor(depth, True)
+        vertical_blured = bokeh_filter_cupy(img_hightlighted, depth, 0, 1, im_h, im_w, num_samples)
+        diag_blured = bokeh_filter_cupy(vertical_blured, depth, math.cos(-PI/6), math.sin(-PI/6), im_h, im_w, num_samples)
+        rhom_blur = bokeh_filter_cupy(diag_blured, depth, math.cos(-PI * 5 /6), math.sin(-PI * 5 /6), im_h, im_w, num_samples)
+        blured = (diag_blured + rhom_blur) / 2
+        blured = ftensor2img(blured, im_h, im_w)
+    else:
+        vertical_blured = bokeh_filter(img_hightlighted, depth, 0, 1, num_samples)
+        diag_blured = bokeh_filter(vertical_blured, depth, math.cos(-PI/6), math.sin(-PI/6), num_samples)
+        rhom_blur = bokeh_filter(diag_blured, depth, math.cos(-PI * 5 /6), math.sin(-PI * 5 /6), num_samples)
+        blured = (diag_blured + rhom_blur) / 2
+    blured = np.power(blured, 1 / lightness_factor)
+    blured = (blured * 255).astype(np.uint8)
+    return blured

utils/env_utils.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import os
+import platform
+import warnings
+import torch.multiprocessing as mp
+def set_multi_processing(
+    mp_start_method: str = "fork", opencv_num_threads: int = 0, distributed: bool = True
+) -> None:
+    """Set multi-processing related environment.
+    This function is refered from https://github.com/open-mmlab/mmengine/blob/main/mmengine/utils/dl_utils/setup_env.py
+    Args:
+        mp_start_method (str): Set the method which should be used to start
+            child processes. Defaults to 'fork'.
+        opencv_num_threads (int): Number of threads for opencv.
+            Defaults to 0.
+        distributed (bool): True if distributed environment.
+            Defaults to False.
+    """  # noqa
+    # set multi-process start method as `fork` to speed up the training
+    if platform.system() != "Windows":
+        current_method = mp.get_start_method(allow_none=True)
+        if current_method is not None and current_method != mp_start_method:
+            warnings.warn(
+                f"Multi-processing start method `{mp_start_method}` is "
+                f"different from the previous setting `{current_method}`."
+                f"It will be force set to `{mp_start_method}`. You can "
+                "change this behavior by changing `mp_start_method` in "
+                "your config."
+            )
+        mp.set_start_method(mp_start_method, force=True)
+    try:
+        import cv2
+        # disable opencv multithreading to avoid system being overloaded
+        cv2.setNumThreads(opencv_num_threads)
+    except ImportError:
+        pass
+    # setup OMP threads
+    # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py  # noqa
+    if "OMP_NUM_THREADS" not in os.environ and distributed:
+        omp_num_threads = 1
+        warnings.warn(
+            "Setting OMP_NUM_THREADS environment variable for each process"
+            f" to be {omp_num_threads} in default, to avoid your system "
+            "being overloaded, please further tune the variable for "
+            "optimal performance in your application as needed."
+        )
+        os.environ["OMP_NUM_THREADS"] = str(omp_num_threads)
+    # # setup MKL threads
+    if "MKL_NUM_THREADS" not in os.environ and distributed:
+        mkl_num_threads = 1
+        warnings.warn(
+            "Setting MKL_NUM_THREADS environment variable for each process"
+            f" to be {mkl_num_threads} in default, to avoid your system "
+            "being overloaded, please further tune the variable for "
+            "optimal performance in your application as needed."
+        )
+        os.environ["MKL_NUM_THREADS"] = str(mkl_num_threads)

utils/helper_math.h ADDED Viewed

	@@ -0,0 +1,1449 @@

+/**
+ * Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+/*
+ *  This file implements common mathematical operations on vector types
+ *  (float3, float4 etc.) since these are not provided as standard by CUDA.
+ *
+ *  The syntax is modeled on the Cg standard library.
+ *
+ *  This is part of the Helper library includes
+ *
+ *    Thanks to Linh Hah for additions and fixes.
+ */
+#ifndef HELPER_MATH_H
+#define HELPER_MATH_H
+#include "cuda_runtime.h"
+typedef unsigned int uint;
+typedef unsigned short ushort;
+#ifndef __CUDACC__
+#include <math.h>
+////////////////////////////////////////////////////////////////////////////////
+// host implementations of CUDA functions
+////////////////////////////////////////////////////////////////////////////////
+inline float fminf(float a, float b)
+{
+    return a < b ? a : b;
+}
+inline float fmaxf(float a, float b)
+{
+    return a > b ? a : b;
+}
+inline int max(int a, int b)
+{
+    return a > b ? a : b;
+}
+inline int min(int a, int b)
+{
+    return a < b ? a : b;
+}
+inline float rsqrtf(float x)
+{
+    return 1.0f / sqrtf(x);
+}
+#endif
+////////////////////////////////////////////////////////////////////////////////
+// constructors
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 make_float2(float s)
+{
+    return make_float2(s, s);
+}
+inline __host__ __device__ float2 make_float2(float3 a)
+{
+    return make_float2(a.x, a.y);
+}
+inline __host__ __device__ float2 make_float2(int2 a)
+{
+    return make_float2(float(a.x), float(a.y));
+}
+inline __host__ __device__ float2 make_float2(uint2 a)
+{
+    return make_float2(float(a.x), float(a.y));
+}
+inline __host__ __device__ int2 make_int2(int s)
+{
+    return make_int2(s, s);
+}
+inline __host__ __device__ int2 make_int2(int3 a)
+{
+    return make_int2(a.x, a.y);
+}
+inline __host__ __device__ int2 make_int2(uint2 a)
+{
+    return make_int2(int(a.x), int(a.y));
+}
+inline __host__ __device__ int2 make_int2(float2 a)
+{
+    return make_int2(int(a.x), int(a.y));
+}
+inline __host__ __device__ uint2 make_uint2(uint s)
+{
+    return make_uint2(s, s);
+}
+inline __host__ __device__ uint2 make_uint2(uint3 a)
+{
+    return make_uint2(a.x, a.y);
+}
+inline __host__ __device__ uint2 make_uint2(int2 a)
+{
+    return make_uint2(uint(a.x), uint(a.y));
+}
+inline __host__ __device__ float3 make_float3(float s)
+{
+    return make_float3(s, s, s);
+}
+inline __host__ __device__ float3 make_float3(float2 a)
+{
+    return make_float3(a.x, a.y, 0.0f);
+}
+inline __host__ __device__ float3 make_float3(float2 a, float s)
+{
+    return make_float3(a.x, a.y, s);
+}
+inline __host__ __device__ float3 make_float3(float4 a)
+{
+    return make_float3(a.x, a.y, a.z);
+}
+inline __host__ __device__ float3 make_float3(int3 a)
+{
+    return make_float3(float(a.x), float(a.y), float(a.z));
+}
+inline __host__ __device__ float3 make_float3(uint3 a)
+{
+    return make_float3(float(a.x), float(a.y), float(a.z));
+}
+inline __host__ __device__ int3 make_int3(int s)
+{
+    return make_int3(s, s, s);
+}
+inline __host__ __device__ int3 make_int3(int2 a)
+{
+    return make_int3(a.x, a.y, 0);
+}
+inline __host__ __device__ int3 make_int3(int2 a, int s)
+{
+    return make_int3(a.x, a.y, s);
+}
+inline __host__ __device__ int3 make_int3(uint3 a)
+{
+    return make_int3(int(a.x), int(a.y), int(a.z));
+}
+inline __host__ __device__ int3 make_int3(float3 a)
+{
+    return make_int3(int(a.x), int(a.y), int(a.z));
+}
+inline __host__ __device__ uint3 make_uint3(uint s)
+{
+    return make_uint3(s, s, s);
+}
+inline __host__ __device__ uint3 make_uint3(uint2 a)
+{
+    return make_uint3(a.x, a.y, 0);
+}
+inline __host__ __device__ uint3 make_uint3(uint2 a, uint s)
+{
+    return make_uint3(a.x, a.y, s);
+}
+inline __host__ __device__ uint3 make_uint3(uint4 a)
+{
+    return make_uint3(a.x, a.y, a.z);
+}
+inline __host__ __device__ uint3 make_uint3(int3 a)
+{
+    return make_uint3(uint(a.x), uint(a.y), uint(a.z));
+}
+inline __host__ __device__ float4 make_float4(float s)
+{
+    return make_float4(s, s, s, s);
+}
+inline __host__ __device__ float4 make_float4(float3 a)
+{
+    return make_float4(a.x, a.y, a.z, 0.0f);
+}
+inline __host__ __device__ float4 make_float4(float3 a, float w)
+{
+    return make_float4(a.x, a.y, a.z, w);
+}
+inline __host__ __device__ float4 make_float4(int4 a)
+{
+    return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
+}
+inline __host__ __device__ float4 make_float4(uint4 a)
+{
+    return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
+}
+inline __host__ __device__ int4 make_int4(int s)
+{
+    return make_int4(s, s, s, s);
+}
+inline __host__ __device__ int4 make_int4(int3 a)
+{
+    return make_int4(a.x, a.y, a.z, 0);
+}
+inline __host__ __device__ int4 make_int4(int3 a, int w)
+{
+    return make_int4(a.x, a.y, a.z, w);
+}
+inline __host__ __device__ int4 make_int4(uint4 a)
+{
+    return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
+}
+inline __host__ __device__ int4 make_int4(float4 a)
+{
+    return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
+}
+inline __host__ __device__ uint4 make_uint4(uint s)
+{
+    return make_uint4(s, s, s, s);
+}
+inline __host__ __device__ uint4 make_uint4(uint3 a)
+{
+    return make_uint4(a.x, a.y, a.z, 0);
+}
+inline __host__ __device__ uint4 make_uint4(uint3 a, uint w)
+{
+    return make_uint4(a.x, a.y, a.z, w);
+}
+inline __host__ __device__ uint4 make_uint4(int4 a)
+{
+    return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w));
+}
+////////////////////////////////////////////////////////////////////////////////
+// negate
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 operator-(float2 &a)
+{
+    return make_float2(-a.x, -a.y);
+}
+inline __host__ __device__ int2 operator-(int2 &a)
+{
+    return make_int2(-a.x, -a.y);
+}
+inline __host__ __device__ float3 operator-(float3 &a)
+{
+    return make_float3(-a.x, -a.y, -a.z);
+}
+inline __host__ __device__ int3 operator-(int3 &a)
+{
+    return make_int3(-a.x, -a.y, -a.z);
+}
+inline __host__ __device__ float4 operator-(float4 &a)
+{
+    return make_float4(-a.x, -a.y, -a.z, -a.w);
+}
+inline __host__ __device__ int4 operator-(int4 &a)
+{
+    return make_int4(-a.x, -a.y, -a.z, -a.w);
+}
+////////////////////////////////////////////////////////////////////////////////
+// addition
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 operator+(float2 a, float2 b)
+{
+    return make_float2(a.x + b.x, a.y + b.y);
+}
+inline __host__ __device__ void operator+=(float2 &a, float2 b)
+{
+    a.x += b.x;
+    a.y += b.y;
+}
+inline __host__ __device__ float2 operator+(float2 a, float b)
+{
+    return make_float2(a.x + b, a.y + b);
+}
+inline __host__ __device__ float2 operator+(float b, float2 a)
+{
+    return make_float2(a.x + b, a.y + b);
+}
+inline __host__ __device__ void operator+=(float2 &a, float b)
+{
+    a.x += b;
+    a.y += b;
+}
+inline __host__ __device__ int2 operator+(int2 a, int2 b)
+{
+    return make_int2(a.x + b.x, a.y + b.y);
+}
+inline __host__ __device__ void operator+=(int2 &a, int2 b)
+{
+    a.x += b.x;
+    a.y += b.y;
+}
+inline __host__ __device__ int2 operator+(int2 a, int b)
+{
+    return make_int2(a.x + b, a.y + b);
+}
+inline __host__ __device__ int2 operator+(int b, int2 a)
+{
+    return make_int2(a.x + b, a.y + b);
+}
+inline __host__ __device__ void operator+=(int2 &a, int b)
+{
+    a.x += b;
+    a.y += b;
+}
+inline __host__ __device__ uint2 operator+(uint2 a, uint2 b)
+{
+    return make_uint2(a.x + b.x, a.y + b.y);
+}
+inline __host__ __device__ void operator+=(uint2 &a, uint2 b)
+{
+    a.x += b.x;
+    a.y += b.y;
+}
+inline __host__ __device__ uint2 operator+(uint2 a, uint b)
+{
+    return make_uint2(a.x + b, a.y + b);
+}
+inline __host__ __device__ uint2 operator+(uint b, uint2 a)
+{
+    return make_uint2(a.x + b, a.y + b);
+}
+inline __host__ __device__ void operator+=(uint2 &a, uint b)
+{
+    a.x += b;
+    a.y += b;
+}
+inline __host__ __device__ float3 operator+(float3 a, float3 b)
+{
+    return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+inline __host__ __device__ void operator+=(float3 &a, float3 b)
+{
+    a.x += b.x;
+    a.y += b.y;
+    a.z += b.z;
+}
+inline __host__ __device__ float3 operator+(float3 a, float b)
+{
+    return make_float3(a.x + b, a.y + b, a.z + b);
+}
+inline __host__ __device__ void operator+=(float3 &a, float b)
+{
+    a.x += b;
+    a.y += b;
+    a.z += b;
+}
+inline __host__ __device__ int3 operator+(int3 a, int3 b)
+{
+    return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+inline __host__ __device__ void operator+=(int3 &a, int3 b)
+{
+    a.x += b.x;
+    a.y += b.y;
+    a.z += b.z;
+}
+inline __host__ __device__ int3 operator+(int3 a, int b)
+{
+    return make_int3(a.x + b, a.y + b, a.z + b);
+}
+inline __host__ __device__ void operator+=(int3 &a, int b)
+{
+    a.x += b;
+    a.y += b;
+    a.z += b;
+}
+inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
+{
+    return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
+{
+    a.x += b.x;
+    a.y += b.y;
+    a.z += b.z;
+}
+inline __host__ __device__ uint3 operator+(uint3 a, uint b)
+{
+    return make_uint3(a.x + b, a.y + b, a.z + b);
+}
+inline __host__ __device__ void operator+=(uint3 &a, uint b)
+{
+    a.x += b;
+    a.y += b;
+    a.z += b;
+}
+inline __host__ __device__ int3 operator+(int b, int3 a)
+{
+    return make_int3(a.x + b, a.y + b, a.z + b);
+}
+inline __host__ __device__ uint3 operator+(uint b, uint3 a)
+{
+    return make_uint3(a.x + b, a.y + b, a.z + b);
+}
+inline __host__ __device__ float3 operator+(float b, float3 a)
+{
+    return make_float3(a.x + b, a.y + b, a.z + b);
+}
+inline __host__ __device__ float4 operator+(float4 a, float4 b)
+{
+    return make_float4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
+}
+inline __host__ __device__ void operator+=(float4 &a, float4 b)
+{
+    a.x += b.x;
+    a.y += b.y;
+    a.z += b.z;
+    a.w += b.w;
+}
+inline __host__ __device__ float4 operator+(float4 a, float b)
+{
+    return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
+}
+inline __host__ __device__ float4 operator+(float b, float4 a)
+{
+    return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
+}
+inline __host__ __device__ void operator+=(float4 &a, float b)
+{
+    a.x += b;
+    a.y += b;
+    a.z += b;
+    a.w += b;
+}
+inline __host__ __device__ int4 operator+(int4 a, int4 b)
+{
+    return make_int4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
+}
+inline __host__ __device__ void operator+=(int4 &a, int4 b)
+{
+    a.x += b.x;
+    a.y += b.y;
+    a.z += b.z;
+    a.w += b.w;
+}
+inline __host__ __device__ int4 operator+(int4 a, int b)
+{
+    return make_int4(a.x + b, a.y + b, a.z + b,  a.w + b);
+}
+inline __host__ __device__ int4 operator+(int b, int4 a)
+{
+    return make_int4(a.x + b, a.y + b, a.z + b,  a.w + b);
+}
+inline __host__ __device__ void operator+=(int4 &a, int b)
+{
+    a.x += b;
+    a.y += b;
+    a.z += b;
+    a.w += b;
+}
+inline __host__ __device__ uint4 operator+(uint4 a, uint4 b)
+{
+    return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
+}
+inline __host__ __device__ void operator+=(uint4 &a, uint4 b)
+{
+    a.x += b.x;
+    a.y += b.y;
+    a.z += b.z;
+    a.w += b.w;
+}
+inline __host__ __device__ uint4 operator+(uint4 a, uint b)
+{
+    return make_uint4(a.x + b, a.y + b, a.z + b,  a.w + b);
+}
+inline __host__ __device__ uint4 operator+(uint b, uint4 a)
+{
+    return make_uint4(a.x + b, a.y + b, a.z + b,  a.w + b);
+}
+inline __host__ __device__ void operator+=(uint4 &a, uint b)
+{
+    a.x += b;
+    a.y += b;
+    a.z += b;
+    a.w += b;
+}
+////////////////////////////////////////////////////////////////////////////////
+// subtract
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 operator-(float2 a, float2 b)
+{
+    return make_float2(a.x - b.x, a.y - b.y);
+}
+inline __host__ __device__ void operator-=(float2 &a, float2 b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+}
+inline __host__ __device__ float2 operator-(float2 a, float b)
+{
+    return make_float2(a.x - b, a.y - b);
+}
+inline __host__ __device__ float2 operator-(float b, float2 a)
+{
+    return make_float2(b - a.x, b - a.y);
+}
+inline __host__ __device__ void operator-=(float2 &a, float b)
+{
+    a.x -= b;
+    a.y -= b;
+}
+inline __host__ __device__ int2 operator-(int2 a, int2 b)
+{
+    return make_int2(a.x - b.x, a.y - b.y);
+}
+inline __host__ __device__ void operator-=(int2 &a, int2 b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+}
+inline __host__ __device__ int2 operator-(int2 a, int b)
+{
+    return make_int2(a.x - b, a.y - b);
+}
+inline __host__ __device__ int2 operator-(int b, int2 a)
+{
+    return make_int2(b - a.x, b - a.y);
+}
+inline __host__ __device__ void operator-=(int2 &a, int b)
+{
+    a.x -= b;
+    a.y -= b;
+}
+inline __host__ __device__ uint2 operator-(uint2 a, uint2 b)
+{
+    return make_uint2(a.x - b.x, a.y - b.y);
+}
+inline __host__ __device__ void operator-=(uint2 &a, uint2 b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+}
+inline __host__ __device__ uint2 operator-(uint2 a, uint b)
+{
+    return make_uint2(a.x - b, a.y - b);
+}
+inline __host__ __device__ uint2 operator-(uint b, uint2 a)
+{
+    return make_uint2(b - a.x, b - a.y);
+}
+inline __host__ __device__ void operator-=(uint2 &a, uint b)
+{
+    a.x -= b;
+    a.y -= b;
+}
+inline __host__ __device__ float3 operator-(float3 a, float3 b)
+{
+    return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
+}
+inline __host__ __device__ void operator-=(float3 &a, float3 b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    a.z -= b.z;
+}
+inline __host__ __device__ float3 operator-(float3 a, float b)
+{
+    return make_float3(a.x - b, a.y - b, a.z - b);
+}
+inline __host__ __device__ float3 operator-(float b, float3 a)
+{
+    return make_float3(b - a.x, b - a.y, b - a.z);
+}
+inline __host__ __device__ void operator-=(float3 &a, float b)
+{
+    a.x -= b;
+    a.y -= b;
+    a.z -= b;
+}
+inline __host__ __device__ int3 operator-(int3 a, int3 b)
+{
+    return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
+}
+inline __host__ __device__ void operator-=(int3 &a, int3 b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    a.z -= b.z;
+}
+inline __host__ __device__ int3 operator-(int3 a, int b)
+{
+    return make_int3(a.x - b, a.y - b, a.z - b);
+}
+inline __host__ __device__ int3 operator-(int b, int3 a)
+{
+    return make_int3(b - a.x, b - a.y, b - a.z);
+}
+inline __host__ __device__ void operator-=(int3 &a, int b)
+{
+    a.x -= b;
+    a.y -= b;
+    a.z -= b;
+}
+inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
+{
+    return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
+}
+inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    a.z -= b.z;
+}
+inline __host__ __device__ uint3 operator-(uint3 a, uint b)
+{
+    return make_uint3(a.x - b, a.y - b, a.z - b);
+}
+inline __host__ __device__ uint3 operator-(uint b, uint3 a)
+{
+    return make_uint3(b - a.x, b - a.y, b - a.z);
+}
+inline __host__ __device__ void operator-=(uint3 &a, uint b)
+{
+    a.x -= b;
+    a.y -= b;
+    a.z -= b;
+}
+inline __host__ __device__ float4 operator-(float4 a, float4 b)
+{
+    return make_float4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
+}
+inline __host__ __device__ void operator-=(float4 &a, float4 b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    a.z -= b.z;
+    a.w -= b.w;
+}
+inline __host__ __device__ float4 operator-(float4 a, float b)
+{
+    return make_float4(a.x - b, a.y - b, a.z - b,  a.w - b);
+}
+inline __host__ __device__ void operator-=(float4 &a, float b)
+{
+    a.x -= b;
+    a.y -= b;
+    a.z -= b;
+    a.w -= b;
+}
+inline __host__ __device__ int4 operator-(int4 a, int4 b)
+{
+    return make_int4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
+}
+inline __host__ __device__ void operator-=(int4 &a, int4 b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    a.z -= b.z;
+    a.w -= b.w;
+}
+inline __host__ __device__ int4 operator-(int4 a, int b)
+{
+    return make_int4(a.x - b, a.y - b, a.z - b,  a.w - b);
+}
+inline __host__ __device__ int4 operator-(int b, int4 a)
+{
+    return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
+}
+inline __host__ __device__ void operator-=(int4 &a, int b)
+{
+    a.x -= b;
+    a.y -= b;
+    a.z -= b;
+    a.w -= b;
+}
+inline __host__ __device__ uint4 operator-(uint4 a, uint4 b)
+{
+    return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
+}
+inline __host__ __device__ void operator-=(uint4 &a, uint4 b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    a.z -= b.z;
+    a.w -= b.w;
+}
+inline __host__ __device__ uint4 operator-(uint4 a, uint b)
+{
+    return make_uint4(a.x - b, a.y - b, a.z - b,  a.w - b);
+}
+inline __host__ __device__ uint4 operator-(uint b, uint4 a)
+{
+    return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
+}
+inline __host__ __device__ void operator-=(uint4 &a, uint b)
+{
+    a.x -= b;
+    a.y -= b;
+    a.z -= b;
+    a.w -= b;
+}
+////////////////////////////////////////////////////////////////////////////////
+// multiply
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 operator*(float2 a, float2 b)
+{
+    return make_float2(a.x * b.x, a.y * b.y);
+}
+inline __host__ __device__ void operator*=(float2 &a, float2 b)
+{
+    a.x *= b.x;
+    a.y *= b.y;
+}
+inline __host__ __device__ float2 operator*(float2 a, float b)
+{
+    return make_float2(a.x * b, a.y * b);
+}
+inline __host__ __device__ float2 operator*(float b, float2 a)
+{
+    return make_float2(b * a.x, b * a.y);
+}
+inline __host__ __device__ void operator*=(float2 &a, float b)
+{
+    a.x *= b;
+    a.y *= b;
+}
+inline __host__ __device__ int2 operator*(int2 a, int2 b)
+{
+    return make_int2(a.x * b.x, a.y * b.y);
+}
+inline __host__ __device__ void operator*=(int2 &a, int2 b)
+{
+    a.x *= b.x;
+    a.y *= b.y;
+}
+inline __host__ __device__ int2 operator*(int2 a, int b)
+{
+    return make_int2(a.x * b, a.y * b);
+}
+inline __host__ __device__ int2 operator*(int b, int2 a)
+{
+    return make_int2(b * a.x, b * a.y);
+}
+inline __host__ __device__ void operator*=(int2 &a, int b)
+{
+    a.x *= b;
+    a.y *= b;
+}
+inline __host__ __device__ uint2 operator*(uint2 a, uint2 b)
+{
+    return make_uint2(a.x * b.x, a.y * b.y);
+}
+inline __host__ __device__ void operator*=(uint2 &a, uint2 b)
+{
+    a.x *= b.x;
+    a.y *= b.y;
+}
+inline __host__ __device__ uint2 operator*(uint2 a, uint b)
+{
+    return make_uint2(a.x * b, a.y * b);
+}
+inline __host__ __device__ uint2 operator*(uint b, uint2 a)
+{
+    return make_uint2(b * a.x, b * a.y);
+}
+inline __host__ __device__ void operator*=(uint2 &a, uint b)
+{
+    a.x *= b;
+    a.y *= b;
+}
+inline __host__ __device__ float3 operator*(float3 a, float3 b)
+{
+    return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
+}
+inline __host__ __device__ void operator*=(float3 &a, float3 b)
+{
+    a.x *= b.x;
+    a.y *= b.y;
+    a.z *= b.z;
+}
+inline __host__ __device__ float3 operator*(float3 a, float b)
+{
+    return make_float3(a.x * b, a.y * b, a.z * b);
+}
+inline __host__ __device__ float3 operator*(float b, float3 a)
+{
+    return make_float3(b * a.x, b * a.y, b * a.z);
+}
+inline __host__ __device__ void operator*=(float3 &a, float b)
+{
+    a.x *= b;
+    a.y *= b;
+    a.z *= b;
+}
+inline __host__ __device__ int3 operator*(int3 a, int3 b)
+{
+    return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
+}
+inline __host__ __device__ void operator*=(int3 &a, int3 b)
+{
+    a.x *= b.x;
+    a.y *= b.y;
+    a.z *= b.z;
+}
+inline __host__ __device__ int3 operator*(int3 a, int b)
+{
+    return make_int3(a.x * b, a.y * b, a.z * b);
+}
+inline __host__ __device__ int3 operator*(int b, int3 a)
+{
+    return make_int3(b * a.x, b * a.y, b * a.z);
+}
+inline __host__ __device__ void operator*=(int3 &a, int b)
+{
+    a.x *= b;
+    a.y *= b;
+    a.z *= b;
+}
+inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
+{
+    return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
+}
+inline __host__ __device__ void operator*=(uint3 &a, uint3 b)
+{
+    a.x *= b.x;
+    a.y *= b.y;
+    a.z *= b.z;
+}
+inline __host__ __device__ uint3 operator*(uint3 a, uint b)
+{
+    return make_uint3(a.x * b, a.y * b, a.z * b);
+}
+inline __host__ __device__ uint3 operator*(uint b, uint3 a)
+{
+    return make_uint3(b * a.x, b * a.y, b * a.z);
+}
+inline __host__ __device__ void operator*=(uint3 &a, uint b)
+{
+    a.x *= b;
+    a.y *= b;
+    a.z *= b;
+}
+inline __host__ __device__ float4 operator*(float4 a, float4 b)
+{
+    return make_float4(a.x * b.x, a.y * b.y, a.z * b.z,  a.w * b.w);
+}
+inline __host__ __device__ void operator*=(float4 &a, float4 b)
+{
+    a.x *= b.x;
+    a.y *= b.y;
+    a.z *= b.z;
+    a.w *= b.w;
+}
+inline __host__ __device__ float4 operator*(float4 a, float b)
+{
+    return make_float4(a.x * b, a.y * b, a.z * b,  a.w * b);
+}
+inline __host__ __device__ float4 operator*(float b, float4 a)
+{
+    return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
+}
+inline __host__ __device__ void operator*=(float4 &a, float b)
+{
+    a.x *= b;
+    a.y *= b;
+    a.z *= b;
+    a.w *= b;
+}
+inline __host__ __device__ int4 operator*(int4 a, int4 b)
+{
+    return make_int4(a.x * b.x, a.y * b.y, a.z * b.z,  a.w * b.w);
+}
+inline __host__ __device__ void operator*=(int4 &a, int4 b)
+{
+    a.x *= b.x;
+    a.y *= b.y;
+    a.z *= b.z;
+    a.w *= b.w;
+}
+inline __host__ __device__ int4 operator*(int4 a, int b)
+{
+    return make_int4(a.x * b, a.y * b, a.z * b,  a.w * b);
+}
+inline __host__ __device__ int4 operator*(int b, int4 a)
+{
+    return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
+}
+inline __host__ __device__ void operator*=(int4 &a, int b)
+{
+    a.x *= b;
+    a.y *= b;
+    a.z *= b;
+    a.w *= b;
+}
+inline __host__ __device__ uint4 operator*(uint4 a, uint4 b)
+{
+    return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z,  a.w * b.w);
+}
+inline __host__ __device__ void operator*=(uint4 &a, uint4 b)
+{
+    a.x *= b.x;
+    a.y *= b.y;
+    a.z *= b.z;
+    a.w *= b.w;
+}
+inline __host__ __device__ uint4 operator*(uint4 a, uint b)
+{
+    return make_uint4(a.x * b, a.y * b, a.z * b,  a.w * b);
+}
+inline __host__ __device__ uint4 operator*(uint b, uint4 a)
+{
+    return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
+}
+inline __host__ __device__ void operator*=(uint4 &a, uint b)
+{
+    a.x *= b;
+    a.y *= b;
+    a.z *= b;
+    a.w *= b;
+}
+////////////////////////////////////////////////////////////////////////////////
+// divide
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 operator/(float2 a, float2 b)
+{
+    return make_float2(a.x / b.x, a.y / b.y);
+}
+inline __host__ __device__ void operator/=(float2 &a, float2 b)
+{
+    a.x /= b.x;
+    a.y /= b.y;
+}
+inline __host__ __device__ float2 operator/(float2 a, float b)
+{
+    return make_float2(a.x / b, a.y / b);
+}
+inline __host__ __device__ void operator/=(float2 &a, float b)
+{
+    a.x /= b;
+    a.y /= b;
+}
+inline __host__ __device__ float2 operator/(float b, float2 a)
+{
+    return make_float2(b / a.x, b / a.y);
+}
+inline __host__ __device__ float3 operator/(float3 a, float3 b)
+{
+    return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
+}
+inline __host__ __device__ void operator/=(float3 &a, float3 b)
+{
+    a.x /= b.x;
+    a.y /= b.y;
+    a.z /= b.z;
+}
+inline __host__ __device__ float3 operator/(float3 a, float b)
+{
+    return make_float3(a.x / b, a.y / b, a.z / b);
+}
+inline __host__ __device__ void operator/=(float3 &a, float b)
+{
+    a.x /= b;
+    a.y /= b;
+    a.z /= b;
+}
+inline __host__ __device__ float3 operator/(float b, float3 a)
+{
+    return make_float3(b / a.x, b / a.y, b / a.z);
+}
+inline __host__ __device__ float4 operator/(float4 a, float4 b)
+{
+    return make_float4(a.x / b.x, a.y / b.y, a.z / b.z,  a.w / b.w);
+}
+inline __host__ __device__ void operator/=(float4 &a, float4 b)
+{
+    a.x /= b.x;
+    a.y /= b.y;
+    a.z /= b.z;
+    a.w /= b.w;
+}
+inline __host__ __device__ float4 operator/(float4 a, float b)
+{
+    return make_float4(a.x / b, a.y / b, a.z / b,  a.w / b);
+}
+inline __host__ __device__ void operator/=(float4 &a, float b)
+{
+    a.x /= b;
+    a.y /= b;
+    a.z /= b;
+    a.w /= b;
+}
+inline __host__ __device__ float4 operator/(float b, float4 a)
+{
+    return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
+}
+////////////////////////////////////////////////////////////////////////////////
+// min
+////////////////////////////////////////////////////////////////////////////////
+inline  __host__ __device__ float2 fminf(float2 a, float2 b)
+{
+    return make_float2(fminf(a.x,b.x), fminf(a.y,b.y));
+}
+inline __host__ __device__ float3 fminf(float3 a, float3 b)
+{
+    return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
+}
+inline  __host__ __device__ float4 fminf(float4 a, float4 b)
+{
+    return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
+}
+inline __host__ __device__ int2 min(int2 a, int2 b)
+{
+    return make_int2(min(a.x,b.x), min(a.y,b.y));
+}
+inline __host__ __device__ int3 min(int3 a, int3 b)
+{
+    return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
+}
+inline __host__ __device__ int4 min(int4 a, int4 b)
+{
+    return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
+}
+inline __host__ __device__ uint2 min(uint2 a, uint2 b)
+{
+    return make_uint2(min(a.x,b.x), min(a.y,b.y));
+}
+inline __host__ __device__ uint3 min(uint3 a, uint3 b)
+{
+    return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
+}
+inline __host__ __device__ uint4 min(uint4 a, uint4 b)
+{
+    return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
+}
+////////////////////////////////////////////////////////////////////////////////
+// max
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 fmaxf(float2 a, float2 b)
+{
+    return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y));
+}
+inline __host__ __device__ float3 fmaxf(float3 a, float3 b)
+{
+    return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
+}
+inline __host__ __device__ float4 fmaxf(float4 a, float4 b)
+{
+    return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
+}
+inline __host__ __device__ int2 max(int2 a, int2 b)
+{
+    return make_int2(max(a.x,b.x), max(a.y,b.y));
+}
+inline __host__ __device__ int3 max(int3 a, int3 b)
+{
+    return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
+}
+inline __host__ __device__ int4 max(int4 a, int4 b)
+{
+    return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
+}
+inline __host__ __device__ uint2 max(uint2 a, uint2 b)
+{
+    return make_uint2(max(a.x,b.x), max(a.y,b.y));
+}
+inline __host__ __device__ uint3 max(uint3 a, uint3 b)
+{
+    return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
+}
+inline __host__ __device__ uint4 max(uint4 a, uint4 b)
+{
+    return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
+}
+////////////////////////////////////////////////////////////////////////////////
+// lerp
+// - linear interpolation between a and b, based on value t in [0, 1] range
+////////////////////////////////////////////////////////////////////////////////
+inline __device__ __host__ float lerp(float a, float b, float t)
+{
+    return a + t*(b-a);
+}
+inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
+{
+    return a + t*(b-a);
+}
+inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
+{
+    return a + t*(b-a);
+}
+inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
+{
+    return a + t*(b-a);
+}
+////////////////////////////////////////////////////////////////////////////////
+// clamp
+// - clamp the value v to be in the range [a, b]
+////////////////////////////////////////////////////////////////////////////////
+inline __device__ __host__ float clamp(float f, float a, float b)
+{
+    return fmaxf(a, fminf(f, b));
+}
+inline __device__ __host__ int clamp(int f, int a, int b)
+{
+    return max(a, min(f, b));
+}
+inline __device__ __host__ uint clamp(uint f, uint a, uint b)
+{
+    return max(a, min(f, b));
+}
+inline __device__ __host__ float2 clamp(float2 v, float a, float b)
+{
+    return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
+}
+inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
+{
+    return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
+}
+inline __device__ __host__ float3 clamp(float3 v, float a, float b)
+{
+    return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
+}
+inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
+{
+    return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
+}
+inline __device__ __host__ float4 clamp(float4 v, float a, float b)
+{
+    return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
+}
+inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
+{
+    return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
+}
+inline __device__ __host__ int2 clamp(int2 v, int a, int b)
+{
+    return make_int2(clamp(v.x, a, b), clamp(v.y, a, b));
+}
+inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b)
+{
+    return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
+}
+inline __device__ __host__ int3 clamp(int3 v, int a, int b)
+{
+    return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
+}
+inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
+{
+    return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
+}
+inline __device__ __host__ int4 clamp(int4 v, int a, int b)
+{
+    return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
+}
+inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b)
+{
+    return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
+}
+inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b)
+{
+    return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b));
+}
+inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b)
+{
+    return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
+}
+inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
+{
+    return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
+}
+inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
+{
+    return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
+}
+inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b)
+{
+    return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
+}
+inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b)
+{
+    return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
+}
+////////////////////////////////////////////////////////////////////////////////
+// dot product
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float dot(float2 a, float2 b)
+{
+    return a.x * b.x + a.y * b.y;
+}
+inline __host__ __device__ float dot(float3 a, float3 b)
+{
+    return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+inline __host__ __device__ float dot(float4 a, float4 b)
+{
+    return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+}
+inline __host__ __device__ int dot(int2 a, int2 b)
+{
+    return a.x * b.x + a.y * b.y;
+}
+inline __host__ __device__ int dot(int3 a, int3 b)
+{
+    return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+inline __host__ __device__ int dot(int4 a, int4 b)
+{
+    return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+}
+inline __host__ __device__ uint dot(uint2 a, uint2 b)
+{
+    return a.x * b.x + a.y * b.y;
+}
+inline __host__ __device__ uint dot(uint3 a, uint3 b)
+{
+    return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+inline __host__ __device__ uint dot(uint4 a, uint4 b)
+{
+    return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+}
+////////////////////////////////////////////////////////////////////////////////
+// length
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float length(float2 v)
+{
+    return sqrtf(dot(v, v));
+}
+inline __host__ __device__ float length(float3 v)
+{
+    return sqrtf(dot(v, v));
+}
+inline __host__ __device__ float length(float4 v)
+{
+    return sqrtf(dot(v, v));
+}
+////////////////////////////////////////////////////////////////////////////////
+// normalize
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 normalize(float2 v)
+{
+    float invLen = rsqrtf(dot(v, v));
+    return v * invLen;
+}
+inline __host__ __device__ float3 normalize(float3 v)
+{
+    float invLen = rsqrtf(dot(v, v));
+    return v * invLen;
+}
+inline __host__ __device__ float4 normalize(float4 v)
+{
+    float invLen = rsqrtf(dot(v, v));
+    return v * invLen;
+}
+////////////////////////////////////////////////////////////////////////////////
+// floor
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 floorf(float2 v)
+{
+    return make_float2(floorf(v.x), floorf(v.y));
+}
+inline __host__ __device__ float3 floorf(float3 v)
+{
+    return make_float3(floorf(v.x), floorf(v.y), floorf(v.z));
+}
+inline __host__ __device__ float4 floorf(float4 v)
+{
+    return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w));
+}
+////////////////////////////////////////////////////////////////////////////////
+// frac - returns the fractional portion of a scalar or each vector component
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float fracf(float v)
+{
+    return v - floorf(v);
+}
+inline __host__ __device__ float2 fracf(float2 v)
+{
+    return make_float2(fracf(v.x), fracf(v.y));
+}
+inline __host__ __device__ float3 fracf(float3 v)
+{
+    return make_float3(fracf(v.x), fracf(v.y), fracf(v.z));
+}
+inline __host__ __device__ float4 fracf(float4 v)
+{
+    return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w));
+}
+////////////////////////////////////////////////////////////////////////////////
+// fmod
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 fmodf(float2 a, float2 b)
+{
+    return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y));
+}
+inline __host__ __device__ float3 fmodf(float3 a, float3 b)
+{
+    return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));
+}
+inline __host__ __device__ float4 fmodf(float4 a, float4 b)
+{
+    return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w));
+}
+////////////////////////////////////////////////////////////////////////////////
+// absolute value
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float2 fabs(float2 v)
+{
+    return make_float2(fabs(v.x), fabs(v.y));
+}
+inline __host__ __device__ float3 fabs(float3 v)
+{
+    return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
+}
+inline __host__ __device__ float4 fabs(float4 v)
+{
+    return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
+}
+inline __host__ __device__ int2 abs(int2 v)
+{
+    return make_int2(abs(v.x), abs(v.y));
+}
+inline __host__ __device__ int3 abs(int3 v)
+{
+    return make_int3(abs(v.x), abs(v.y), abs(v.z));
+}
+inline __host__ __device__ int4 abs(int4 v)
+{
+    return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w));
+}
+////////////////////////////////////////////////////////////////////////////////
+// reflect
+// - returns reflection of incident ray I around surface normal N
+// - N should be normalized, reflected vector's length is equal to length of I
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float3 reflect(float3 i, float3 n)
+{
+    return i - 2.0f * n * dot(n,i);
+}
+////////////////////////////////////////////////////////////////////////////////
+// cross product
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ float3 cross(float3 a, float3 b)
+{
+    return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
+}
+////////////////////////////////////////////////////////////////////////////////
+// smoothstep
+// - returns 0 if x < a
+// - returns 1 if x > b
+// - otherwise returns smooth interpolation between 0 and 1 based on x
+////////////////////////////////////////////////////////////////////////////////
+inline __device__ __host__ float smoothstep(float a, float b, float x)
+{
+    float y = clamp((x - a) / (b - a), 0.0f, 1.0f);
+    return (y*y*(3.0f - (2.0f*y)));
+}
+inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x)
+{
+    float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
+    return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y)));
+}
+inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x)
+{
+    float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
+    return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y)));
+}
+inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x)
+{
+    float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
+    return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y)));
+}
+#endif

utils/io_utils.py ADDED Viewed

	@@ -0,0 +1,473 @@

+import json, os, sys
+import os.path as osp
+from typing import List, Union, Tuple, Dict
+from pathlib import Path
+import cv2
+import numpy as np
+from imageio import imread, imwrite
+import pickle
+import pycocotools.mask as maskUtils
+from einops import rearrange
+from tqdm import tqdm
+from PIL import Image
+import io
+import requests
+import traceback
+import base64
+import time
+NP_BOOL_TYPES = (np.bool_, np.bool8)
+NP_FLOAT_TYPES = (np.float_, np.float16, np.float32, np.float64)
+NP_INT_TYPES = (np.int_, np.int8, np.int16, np.int32, np.int64, np.uint, np.uint8, np.uint16, np.uint32, np.uint64)
+class NumpyEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+        elif isinstance(obj, np.ScalarType):
+            if isinstance(obj, NP_BOOL_TYPES):
+                return bool(obj)
+            elif isinstance(obj, NP_FLOAT_TYPES):
+                return float(obj)
+            elif isinstance(obj, NP_INT_TYPES):
+                return int(obj)
+        return json.JSONEncoder.default(self, obj)
+def json2dict(json_path: str):
+    with open(json_path, 'r', encoding='utf8') as f:
+        metadata = json.loads(f.read())
+    return metadata
+def dict2json(adict: dict, json_path: str):
+    with open(json_path, "w", encoding="utf-8") as f:
+        f.write(json.dumps(adict, ensure_ascii=False, cls=NumpyEncoder))
+def dict2pickle(dumped_path: str, tgt_dict: dict):
+    with open(dumped_path, "wb") as f:
+        pickle.dump(tgt_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
+def pickle2dict(pkl_path: str) -> Dict:
+    with open(pkl_path, "rb") as f:
+        dumped_data = pickle.load(f)
+    return dumped_data
+def get_all_dirs(root_p: str) -> List[str]:
+    alldir = os.listdir(root_p)
+    dirlist = []
+    for dirp in alldir:
+        dirp = osp.join(root_p, dirp)
+        if osp.isdir(dirp):
+            dirlist.append(dirp)
+    return dirlist
+def read_filelist(filelistp: str):
+    with open(filelistp, 'r', encoding='utf8') as f:
+        lines = f.readlines()
+    if len(lines) > 0 and lines[-1].strip() == '':
+        lines = lines[:-1]
+    return lines
+VIDEO_EXTS = {'.flv', '.mp4', '.mkv', '.ts', '.mov', 'mpeg'}
+def get_all_videos(video_dir: str, video_exts=VIDEO_EXTS, abs_path=False) -> List[str]:
+    filelist = os.listdir(video_dir)
+    vlist = []
+    for f in filelist:
+        if Path(f).suffix in video_exts:
+            if abs_path:
+                vlist.append(osp.join(video_dir, f))
+            else:
+                vlist.append(f)
+    return vlist
+IMG_EXT = {'.bmp', '.jpg', '.png', '.jpeg'}
+def find_all_imgs(img_dir, abs_path=False):
+    imglist = []
+    dir_list = os.listdir(img_dir)
+    for filename in dir_list:
+        file_suffix = Path(filename).suffix
+        if file_suffix.lower() not in IMG_EXT:
+            continue
+        if abs_path:
+            imglist.append(osp.join(img_dir, filename))
+        else:
+            imglist.append(filename)
+    return imglist
+def find_all_files_recursive(tgt_dir: Union[List, str], ext, exclude_dirs={}):
+    if isinstance(tgt_dir, str):
+        tgt_dir = [tgt_dir]
+    filelst = []
+    for d in tgt_dir:
+        for root, _, files in os.walk(d):
+            if osp.basename(root) in exclude_dirs:
+                continue
+            for f in files:
+                if Path(f).suffix.lower() in ext:
+                    filelst.append(osp.join(root, f))
+    return filelst
+def danbooruid2relpath(id_str: str, file_ext='.jpg'):
+    if not isinstance(id_str, str):
+        id_str = str(id_str)
+    return id_str[-3:].zfill(4) + '/' + id_str  + file_ext
+def get_template_histvq(template: np.ndarray) -> Tuple[List[np.ndarray]]:
+    len_shape = len(template.shape)
+    num_c = 3
+    mask = None
+    if len_shape == 2:
+        num_c = 1
+    elif len_shape == 3 and template.shape[-1] == 4:
+        mask = np.where(template[..., -1])
+        template = template[..., :num_c][mask]
+    values, quantiles = [], []
+    for ii in range(num_c):
+        v, c = np.unique(template[..., ii].ravel(), return_counts=True)
+        q = np.cumsum(c).astype(np.float64)
+        if len(q) < 1:
+            return None, None
+        q /= q[-1]
+        values.append(v)
+        quantiles.append(q)
+    return values, quantiles
+def inplace_hist_matching(img: np.ndarray, tv: List[np.ndarray], tq: List[np.ndarray]) -> None:
+    len_shape = len(img.shape)
+    num_c = 3
+    mask = None
+    tgtimg = img
+    if len_shape == 2:
+        num_c = 1
+    elif len_shape == 3 and img.shape[-1] == 4:
+        mask = np.where(img[..., -1])
+        tgtimg = img[..., :num_c][mask]
+    im_h, im_w = img.shape[:2]
+    oldtype = img.dtype
+    for ii in range(num_c):
+        _, bin_idx, s_counts = np.unique(tgtimg[..., ii].ravel(), return_inverse=True,
+                                                return_counts=True)
+        s_quantiles = np.cumsum(s_counts).astype(np.float64)
+        if len(s_quantiles) == 0:
+            return
+        s_quantiles /= s_quantiles[-1]
+        interp_t_values = np.interp(s_quantiles, tq[ii], tv[ii]).astype(oldtype)
+        if mask is not None:
+            img[..., ii][mask] = interp_t_values[bin_idx]
+        else:
+            img[..., ii] = interp_t_values[bin_idx].reshape((im_h, im_w))
+            # try:
+            #     img[..., ii] = interp_t_values[bin_idx].reshape((im_h, im_w))
+            # except:
+            #     LOGGER.error('##################### sth goes wrong')
+            #     cv2.imshow('img', img)
+            #     cv2.waitKey(0)
+def fgbg_hist_matching(fg_list: List, bg: np.ndarray, min_tq_num=128):
+    btv, btq = get_template_histvq(bg)
+    ftv, ftq = get_template_histvq(fg_list[0]['image'])
+    num_fg = len(fg_list)
+    idx_matched = -1
+    if num_fg > 1:
+        _ftv, _ftq = get_template_histvq(fg_list[0]['image'])
+        if _ftq is not None and ftq is not None:
+            if len(_ftq[0]) > len(ftq[0]):
+                idx_matched = num_fg - 1
+                ftv, ftq = _ftv, _ftq
+            else:
+                idx_matched = 0
+    if btq is not None and ftq is not None:
+        if len(btq[0]) > len(ftq[0]):
+            tv, tq = btv, btq
+            idx_matched = -1
+        else:
+            tv, tq = ftv, ftq
+            if len(tq[0]) > min_tq_num:
+                inplace_hist_matching(bg, tv, tq)
+        if len(tq[0]) > min_tq_num:
+            for ii, fg_dict in enumerate(fg_list):
+                fg = fg_dict['image']
+                if ii != idx_matched and len(tq[0]) > min_tq_num:
+                    inplace_hist_matching(fg, tv, tq)
+def imread_nogrey_rgb(imp: str) -> np.ndarray:
+    img: np.ndarray = imread(imp)
+    c = 1
+    if len(img.shape) == 3:
+        c = img.shape[-1]
+    if c == 1:
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    if c == 4:
+        img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
+    return img
+def square_pad_resize(img: np.ndarray, tgt_size: int, pad_value: Tuple = (114, 114, 114)):
+    h, w = img.shape[:2]
+    pad_h, pad_w = 0, 0
+    # make square image
+    if w < h:
+        pad_w = h - w
+        w += pad_w
+    elif h < w:
+        pad_h = w - h
+        h += pad_h
+    pad_size = tgt_size - h
+    if pad_size > 0:
+        pad_h += pad_size
+        pad_w += pad_size
+    if pad_h > 0 or pad_w > 0:
+        img = cv2.copyMakeBorder(img, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, value=pad_value)
+    down_scale_ratio = tgt_size / img.shape[0]
+    assert down_scale_ratio <= 1
+    if down_scale_ratio < 1:
+        img = cv2.resize(img, (tgt_size, tgt_size), interpolation=cv2.INTER_AREA)
+    return img, down_scale_ratio, pad_h, pad_w
+def scaledown_maxsize(img: np.ndarray, max_size: int, divisior: int = None):
+    im_h, im_w = img.shape[:2]
+    ori_h, ori_w = img.shape[:2]
+    resize_ratio = max_size / max(im_h, im_w)
+    if resize_ratio < 1:
+        if im_h > im_w:
+            im_h = max_size
+            im_w = max(1, int(round(im_w * resize_ratio)))
+        else:
+            im_w = max_size
+            im_h = max(1, int(round(im_h * resize_ratio)))
+    if divisior is not None:
+        im_w = int(np.ceil(im_w / divisior) * divisior)
+        im_h = int(np.ceil(im_h / divisior) * divisior)
+    if im_w != ori_w or im_h != ori_h:
+       img = cv2.resize(img, (im_w, im_h), interpolation=cv2.INTER_LINEAR)
+    return img
+def resize_pad(img: np.ndarray, tgt_size: int, pad_value: Tuple = (0, 0, 0)):
+    # downscale to tgt_size and pad to square
+    img = scaledown_maxsize(img, tgt_size)
+    padl, padr, padt, padb = 0, 0, 0, 0
+    h, w = img.shape[:2]
+    # padt = (tgt_size - h) // 2
+    # padb = tgt_size - h - padt
+    # padl = (tgt_size - w) // 2
+    # padr = tgt_size - w - padl
+    padb = tgt_size - h
+    padr = tgt_size - w
+    if padt + padb + padl + padr > 0:
+        img = cv2.copyMakeBorder(img, padt, padb, padl, padr, cv2.BORDER_CONSTANT, value=pad_value)
+    return img, (padt, padb, padl, padr)
+def resize_pad2divisior(img: np.ndarray, tgt_size: int, divisior: int = 64, pad_value: Tuple = (0, 0, 0)):
+    img = scaledown_maxsize(img, tgt_size)
+    img, (pad_h, pad_w) = pad2divisior(img, divisior, pad_value)
+    return img, (pad_h, pad_w)
+def img2grey(img: Union[np.ndarray, str], is_rgb: bool = False) -> np.ndarray:
+    if isinstance(img, np.ndarray):
+        if len(img.shape) == 3:
+            if img.shape[-1] != 1:
+                if is_rgb:
+                    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+                else:
+                    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+            else:
+                img = img[..., 0]
+        return img
+    elif isinstance(img, str):
+        return cv2.imread(img, cv2.IMREAD_GRAYSCALE)
+    else:
+        raise NotImplementedError
+def pad2divisior(img: np.ndarray, divisior: int, value = (0, 0, 0)) -> np.ndarray:
+    im_h, im_w = img.shape[:2]
+    pad_h = int(np.ceil(im_h / divisior)) * divisior - im_h
+    pad_w = int(np.ceil(im_w / divisior)) * divisior - im_w
+    if pad_h != 0 or pad_w != 0:
+        img = cv2.copyMakeBorder(img, 0, pad_h, 0, pad_w, value=value, borderType=cv2.BORDER_CONSTANT)
+    return img, (pad_h, pad_w)
+def mask2rle(mask: np.ndarray, decode_for_json: bool = True) -> Dict:
+    mask_rle = maskUtils.encode(np.array(
+                        mask[..., np.newaxis] > 0, order='F',
+                        dtype='uint8'))[0]
+    if decode_for_json:
+        mask_rle['counts'] = mask_rle['counts'].decode()
+    return mask_rle
+def bbox2xyxy(box) -> Tuple[int]:
+    x1, y1 = box[0], box[1]
+    return x1, y1, x1+box[2], y1+box[3]
+def bbox_overlap_area(abox, boxb) -> int:
+    ax1, ay1, ax2, ay2 = bbox2xyxy(abox)
+    bx1, by1, bx2, by2 = bbox2xyxy(boxb)
+    ix = min(ax2, bx2) - max(ax1, bx1)
+    iy = min(ay2, by2) - max(ay1, by1)
+    if ix > 0 and iy > 0:
+        return ix * iy
+    else:
+        return 0
+def bbox_overlap_xy(abox, boxb) -> Tuple[int]:
+    ax1, ay1, ax2, ay2 = bbox2xyxy(abox)
+    bx1, by1, bx2, by2 = bbox2xyxy(boxb)
+    ix = min(ax2, bx2) - max(ax1, bx1)
+    iy = min(ay2, by2) - max(ay1, by1)
+    return ix, iy
+def xyxy_overlap_area(axyxy, bxyxy) -> int:
+    ax1, ay1, ax2, ay2 = axyxy
+    bx1, by1, bx2, by2 = bxyxy
+    ix = min(ax2, bx2) - max(ax1, bx1)
+    iy = min(ay2, by2) - max(ay1, by1)
+    if ix > 0 and iy > 0:
+        return ix * iy
+    else:
+        return 0
+DIRNAME2TAG = {'rezero': 're:zero'}
+def dirname2charactername(dirname, start=6):
+    cname = dirname[start:]
+    for k, v in DIRNAME2TAG.items():
+        cname = cname.replace(k, v)
+    return cname
+def imglist2grid(imglist: np.ndarray, grid_size: int = 384, col=None) -> np.ndarray:
+    sqimlist = []
+    for img in imglist:
+        sqimlist.append(square_pad_resize(img, grid_size)[0])
+    nimg = len(imglist)
+    if nimg == 0:
+        return None
+    padn = 0
+    if col is None:
+        if nimg > 5:
+            row = int(np.round(np.sqrt(nimg)))
+            col = int(np.ceil(nimg / row))
+        else:
+            col = nimg
+    padn = int(np.ceil(nimg / col) * col) - nimg
+    if padn != 0:
+        padimg = np.zeros_like(sqimlist[0])
+        for _ in range(padn):
+            sqimlist.append(padimg)
+    return rearrange(sqimlist, '(row col) h w c -> (row h) (col w) c', col=col)
+def write_jsonlines(filep: str, dict_lst: List[str], progress_bar: bool = True):
+    with open(filep, 'w') as out:
+        if progress_bar:
+            lst = tqdm(dict_lst)
+        else:
+            lst = dict_lst
+        for ddict in lst:
+            jout = json.dumps(ddict) + '\n'
+            out.write(jout)
+def read_jsonlines(filep: str):
+    with open(filep, 'r', encoding='utf8') as f:
+        result = [json.loads(jline) for jline in f.read().splitlines()]
+    return result
+def _b64encode(x: bytes) -> str:
+    return base64.b64encode(x).decode("utf-8")
+def img2b64(img):
+    """
+    Convert a PIL image to a base64-encoded string.
+    """
+    if isinstance(img, np.ndarray):
+        img = Image.fromarray(img)
+    buffered = io.BytesIO()
+    img.save(buffered, format='PNG')
+    return _b64encode(buffered.getvalue())
+def save_encoded_image(b64_image: str, output_path: str):
+    with open(output_path, "wb") as image_file:
+        image_file.write(base64.b64decode(b64_image))
+def submit_request(url, data, exist_on_exception=True, auth=None, wait_time = 30):
+    response = None
+    try:
+        while True:
+            try:
+                response = requests.post(url, data=data, auth=auth)
+                response.raise_for_status()
+                break
+            except Exception as e:
+                if wait_time > 0:
+                    print(traceback.format_exc(), file=sys.stderr)
+                    print(f'sleep {wait_time} sec...')
+                    time.sleep(wait_time)
+                    continue
+                else:
+                    raise e
+    except Exception as e:
+        print(traceback.format_exc(), file=sys.stderr)
+        if response is not None:
+            print('response content: ' + response.text)
+        if exist_on_exception:
+            exit()
+    return response
+# def resize_image(input_image, resolution):
+#     H, W = input_image.shape[:2]
+#     k = float(min(resolution)) / min(H, W)
+#     img = cv2.resize(input_image, resolution, interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
+#     return img

utils/logger.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import logging
+import os.path as osp
+from termcolor import colored
+def set_logging(name=None, verbose=True):
+    for handler in logging.root.handlers[:]:
+        logging.root.removeHandler(handler)
+    # Sets level and returns logger
+    # rank = int(os.getenv('RANK', -1))  # rank in world for Multi-GPU trainings
+    fmt = (
+        # colored("[%(name)s]", "magenta", attrs=["bold"])
+        colored("[%(asctime)s]", "blue")
+        + colored("%(levelname)s:", "green")
+        + colored("%(message)s", "white")
+    )
+    logging.basicConfig(format=fmt, level=logging.INFO if verbose else logging.WARNING)
+    return logging.getLogger(name)
+LOGGER = set_logging(__name__)  # define globally (used in train.py, val.py, detect.py, etc.)

utils/mmdet_custom_hooks.py ADDED Viewed

	@@ -0,0 +1,223 @@

+from mmengine.fileio import FileClient
+from mmengine.dist import master_only
+from einops import rearrange
+import torch
+import mmcv
+import numpy as np
+import os.path as osp
+import cv2
+from typing import Optional, Sequence
+import torch.nn as nn
+from mmdet.apis import inference_detector
+from mmcv.transforms import Compose
+from mmdet.engine import DetVisualizationHook
+from mmdet.registry import HOOKS
+from mmdet.structures import DetDataSample
+from utils.io_utils import find_all_imgs, square_pad_resize, imglist2grid
+def inference_detector(
+    model: nn.Module,
+    imgs,
+    test_pipeline
+):
+    if isinstance(imgs, (list, tuple)):
+        is_batch = True
+    else:
+        imgs = [imgs]
+        is_batch = False
+    if len(imgs) == 0:
+        return []
+    test_pipeline = test_pipeline.copy()
+    if isinstance(imgs[0], np.ndarray):
+        # Calling this method across libraries will result
+        # in module unregistered error if not prefixed with mmdet.
+        test_pipeline[0].type = 'mmdet.LoadImageFromNDArray'
+    test_pipeline = Compose(test_pipeline)
+    result_list = []
+    for img in imgs:
+        # prepare data
+        if isinstance(img, np.ndarray):
+            # TODO: remove img_id.
+            data_ = dict(img=img, img_id=0)
+        else:
+            # TODO: remove img_id.
+            data_ = dict(img_path=img, img_id=0)
+        # build the data pipeline
+        data_ = test_pipeline(data_)
+        data_['inputs'] = [data_['inputs']]
+        data_['data_samples'] = [data_['data_samples']]
+        # forward the model
+        with torch.no_grad():
+            results = model.test_step(data_)[0]
+        result_list.append(results)
+    if not is_batch:
+        return result_list[0]
+    else:
+        return result_list
+@HOOKS.register_module()
+class InstanceSegVisualizationHook(DetVisualizationHook):
+    def __init__(self, visualize_samples: str = '',
+                 read_rgb: bool = False,
+                 draw: bool = False,
+                 interval: int = 50,
+                 score_thr: float = 0.3,
+                 show: bool = False,
+                 wait_time: float = 0.,
+                 test_out_dir: Optional[str] = None,
+                 file_client_args: dict = dict(backend='disk')):
+        super().__init__(draw, interval, score_thr, show, wait_time, test_out_dir, file_client_args)
+        self.vis_samples = []
+        if osp.exists(visualize_samples):
+            self.channel_order = channel_order = 'rgb' if read_rgb else 'bgr'
+            samples = find_all_imgs(visualize_samples, abs_path=True)
+            for imgp in samples:
+                img = mmcv.imread(imgp, channel_order=channel_order)
+                img, _, _, _ = square_pad_resize(img, 640)
+                self.vis_samples.append(img)
+    def before_val(self, runner) -> None:
+        total_curr_iter = runner.iter
+        self._visualize_data(total_curr_iter, runner)
+        return super().before_val(runner)
+    # def after_val_iter(self, runner: Runner, batch_idx: int, data_batch: dict,
+    #                    outputs: Sequence[DetDataSample]) -> None:
+    #     """Run after every ``self.interval`` validation iterations.
+    #     Args:
+    #         runner (:obj:`Runner`): The runner of the validation process.
+    #         batch_idx (int): The index of the current batch in the val loop.
+    #         data_batch (dict): Data from dataloader.
+    #         outputs (Sequence[:obj:`DetDataSample`]]): A batch of data samples
+    #             that contain annotations and predictions.
+    #     """
+    #     # if self.draw is False:
+    #     #     return
+    #     if self.file_client is None:
+    #         self.file_client = FileClient(**self.file_client_args)
+    #     # There is no guarantee that the same batch of images
+    #     # is visualized for each evaluation.
+    #     total_curr_iter = runner.iter + batch_idx
+    #     # # Visualize only the first data
+    #     # img_path = outputs[0].img_path
+    #     # img_bytes = self.file_client.get(img_path)
+    #     # img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
+    #     if total_curr_iter % self.interval == 0 and self.vis_samples:
+    #         self._visualize_data(total_curr_iter, runner)
+    @master_only
+    def _visualize_data(self, total_curr_iter, runner):
+        tgt_size = 384
+        runner.model.eval()
+        outputs = inference_detector(runner.model, self.vis_samples, test_pipeline=runner.cfg.test_pipeline)
+        vis_results = []
+        for img, output in zip(self.vis_samples, outputs):
+            vis_img = self.add_datasample(
+                    'val_img',
+                    img,
+                    data_sample=output,
+                    show=self.show,
+                    wait_time=self.wait_time,
+                    pred_score_thr=self.score_thr,
+                    draw_gt=False,
+                    step=total_curr_iter)
+            vis_results.append(cv2.resize(vis_img, (tgt_size, tgt_size), interpolation=cv2.INTER_AREA))
+        drawn_img = imglist2grid(vis_results, tgt_size)
+        if drawn_img is None:
+            return
+        drawn_img = cv2.cvtColor(drawn_img, cv2.COLOR_BGR2RGB)
+        visualizer = self._visualizer
+        visualizer.set_image(drawn_img)
+        visualizer.add_image('val_img', drawn_img, total_curr_iter)
+    @master_only
+    def add_datasample(
+            self,
+            name: str,
+            image: np.ndarray,
+            data_sample: Optional['DetDataSample'] = None,
+            draw_gt: bool = True,
+            draw_pred: bool = True,
+            show: bool = False,
+            wait_time: float = 0,
+            # TODO: Supported in mmengine's Viusalizer.
+            out_file: Optional[str] = None,
+            pred_score_thr: float = 0.3,
+            step: int = 0) -> np.ndarray:
+        image = image.clip(0, 255).astype(np.uint8)
+        visualizer = self._visualizer
+        classes = visualizer.dataset_meta.get('classes', None)
+        palette = visualizer.dataset_meta.get('palette', None)
+        gt_img_data = None
+        pred_img_data = None
+        if data_sample is not None:
+            data_sample = data_sample.cpu()
+        if draw_gt and data_sample is not None:
+            gt_img_data = image
+            if 'gt_instances' in data_sample:
+                gt_img_data = visualizer._draw_instances(image,
+                                                   data_sample.gt_instances,
+                                                   classes, palette)
+            if 'gt_panoptic_seg' in data_sample:
+                assert classes is not None, 'class information is ' \
+                                            'not provided when ' \
+                                            'visualizing panoptic ' \
+                                            'segmentation results.'
+                gt_img_data = visualizer._draw_panoptic_seg(
+                    gt_img_data, data_sample.gt_panoptic_seg, classes)
+        if draw_pred and data_sample is not None:
+            pred_img_data = image
+            if 'pred_instances' in data_sample:
+                pred_instances = data_sample.pred_instances
+                pred_instances = pred_instances[
+                    pred_instances.scores > pred_score_thr]
+                pred_img_data = visualizer._draw_instances(image, pred_instances,
+                                                     classes, palette)
+            if 'pred_panoptic_seg' in data_sample:
+                assert classes is not None, 'class information is ' \
+                                            'not provided when ' \
+                                            'visualizing panoptic ' \
+                                            'segmentation results.'
+                pred_img_data = visualizer._draw_panoptic_seg(
+                    pred_img_data, data_sample.pred_panoptic_seg.numpy(),
+                    classes)
+        if gt_img_data is not None and pred_img_data is not None:
+            drawn_img = np.concatenate((gt_img_data, pred_img_data), axis=1)
+        elif gt_img_data is not None:
+            drawn_img = gt_img_data
+        elif pred_img_data is not None:
+            drawn_img = pred_img_data
+        else:
+            # Display the original image directly if nothing is drawn.
+            drawn_img = image
+        return drawn_img