Spaces:

Stable-X
/

normal-estimation-arena

Running on Zero

App Files Files Community

Stable-X commited on Jun 12, 2024

Commit

53a077e

1 Parent(s): a18753a

Fix environment dependency

Browse files

Files changed (15) hide show

app.py +1 -2
geffnet/__init__.py +5 -0
geffnet/activations/__init__.py +137 -0
geffnet/activations/activations.py +102 -0
geffnet/activations/activations_jit.py +79 -0
geffnet/activations/activations_me.py +170 -0
geffnet/config.py +123 -0
geffnet/conv2d_layers.py +304 -0
geffnet/efficientnet_builder.py +683 -0
geffnet/gen_efficientnet.py +1450 -0
geffnet/helpers.py +71 -0
geffnet/mobilenetv3.py +364 -0
geffnet/model_factory.py +27 -0
geffnet/version.py +1 -0
requirements.txt +5 -6

app.py CHANGED Viewed

@@ -375,10 +375,9 @@ def run_demo_server(pipe):
 def main():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     marigold_pipe = Marigold()
     geowizard_pipe = Geowizard()
-    dsine_pipe = DSINE()
     our_pipe = StableNormal()
     run_demo_server([dsine_pipe, marigold_pipe, geowizard_pipe, our_pipe])

 def main():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    dsine_pipe = DSINE()
     marigold_pipe = Marigold()
     geowizard_pipe = Geowizard()
     our_pipe = StableNormal()
     run_demo_server([dsine_pipe, marigold_pipe, geowizard_pipe, our_pipe])

geffnet/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .gen_efficientnet import *
+from .mobilenetv3 import *
+from .model_factory import create_model
+from .config import is_exportable, is_scriptable, set_exportable, set_scriptable
+from .activations import *

geffnet/activations/__init__.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from geffnet import config
+from geffnet.activations.activations_me import *
+from geffnet.activations.activations_jit import *
+from geffnet.activations.activations import *
+import torch
+_has_silu = 'silu' in dir(torch.nn.functional)
+_ACT_FN_DEFAULT = dict(
+    silu=F.silu if _has_silu else swish,
+    swish=F.silu if _has_silu else swish,
+    mish=mish,
+    relu=F.relu,
+    relu6=F.relu6,
+    sigmoid=sigmoid,
+    tanh=tanh,
+    hard_sigmoid=hard_sigmoid,
+    hard_swish=hard_swish,
+)
+_ACT_FN_JIT = dict(
+    silu=F.silu if _has_silu else swish_jit,
+    swish=F.silu if _has_silu else swish_jit,
+    mish=mish_jit,
+)
+_ACT_FN_ME = dict(
+    silu=F.silu if _has_silu else swish_me,
+    swish=F.silu if _has_silu else swish_me,
+    mish=mish_me,
+    hard_swish=hard_swish_me,
+    hard_sigmoid_jit=hard_sigmoid_me,
+)
+_ACT_LAYER_DEFAULT = dict(
+    silu=nn.SiLU if _has_silu else Swish,
+    swish=nn.SiLU if _has_silu else Swish,
+    mish=Mish,
+    relu=nn.ReLU,
+    relu6=nn.ReLU6,
+    sigmoid=Sigmoid,
+    tanh=Tanh,
+    hard_sigmoid=HardSigmoid,
+    hard_swish=HardSwish,
+)
+_ACT_LAYER_JIT = dict(
+    silu=nn.SiLU if _has_silu else SwishJit,
+    swish=nn.SiLU if _has_silu else SwishJit,
+    mish=MishJit,
+)
+_ACT_LAYER_ME = dict(
+    silu=nn.SiLU if _has_silu else SwishMe,
+    swish=nn.SiLU if _has_silu else SwishMe,
+    mish=MishMe,
+    hard_swish=HardSwishMe,
+    hard_sigmoid=HardSigmoidMe
+)
+_OVERRIDE_FN = dict()
+_OVERRIDE_LAYER = dict()
+def add_override_act_fn(name, fn):
+    global _OVERRIDE_FN
+    _OVERRIDE_FN[name] = fn
+def update_override_act_fn(overrides):
+    assert isinstance(overrides, dict)
+    global _OVERRIDE_FN
+    _OVERRIDE_FN.update(overrides)
+def clear_override_act_fn():
+    global _OVERRIDE_FN
+    _OVERRIDE_FN = dict()
+def add_override_act_layer(name, fn):
+    _OVERRIDE_LAYER[name] = fn
+def update_override_act_layer(overrides):
+    assert isinstance(overrides, dict)
+    global _OVERRIDE_LAYER
+    _OVERRIDE_LAYER.update(overrides)
+def clear_override_act_layer():
+    global _OVERRIDE_LAYER
+    _OVERRIDE_LAYER = dict()
+def get_act_fn(name='relu'):
+    """ Activation Function Factory
+    Fetching activation fns by name with this function allows export or torch script friendly
+    functions to be returned dynamically based on current config.
+    """
+    if name in _OVERRIDE_FN:
+        return _OVERRIDE_FN[name]
+    use_me = not (config.is_exportable() or config.is_scriptable() or config.is_no_jit())
+    if use_me and name in _ACT_FN_ME:
+        # If not exporting or scripting the model, first look for a memory optimized version
+        # activation with custom autograd, then fallback to jit scripted, then a Python or Torch builtin
+        return _ACT_FN_ME[name]
+    if config.is_exportable() and name in ('silu', 'swish'):
+        # FIXME PyTorch SiLU doesn't ONNX export, this is a temp hack
+        return swish
+    use_jit = not (config.is_exportable() or config.is_no_jit())
+    # NOTE: export tracing should work with jit scripted components, but I keep running into issues
+    if use_jit and name in _ACT_FN_JIT:  # jit scripted models should be okay for export/scripting
+        return _ACT_FN_JIT[name]
+    return _ACT_FN_DEFAULT[name]
+def get_act_layer(name='relu'):
+    """ Activation Layer Factory
+    Fetching activation layers by name with this function allows export or torch script friendly
+    functions to be returned dynamically based on current config.
+    """
+    if name in _OVERRIDE_LAYER:
+        return _OVERRIDE_LAYER[name]
+    use_me = not (config.is_exportable() or config.is_scriptable() or config.is_no_jit())
+    if use_me and name in _ACT_LAYER_ME:
+        return _ACT_LAYER_ME[name]
+    if config.is_exportable() and name in ('silu', 'swish'):
+        # FIXME PyTorch SiLU doesn't ONNX export, this is a temp hack
+        return Swish
+    use_jit = not (config.is_exportable() or config.is_no_jit())
+    # NOTE: export tracing should work with jit scripted components, but I keep running into issues
+    if use_jit and name in _ACT_FN_JIT:  # jit scripted models should be okay for export/scripting
+        return _ACT_LAYER_JIT[name]
+    return _ACT_LAYER_DEFAULT[name]

geffnet/activations/activations.py ADDED Viewed

	@@ -0,0 +1,102 @@

+""" Activations
+A collection of activations fn and modules with a common interface so that they can
+easily be swapped. All have an `inplace` arg even if not used.
+Copyright 2020 Ross Wightman
+"""
+from torch import nn as nn
+from torch.nn import functional as F
+def swish(x, inplace: bool = False):
+    """Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3)
+    and also as Swish (https://arxiv.org/abs/1710.05941).
+    TODO Rename to SiLU with addition to PyTorch
+    """
+    return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())
+class Swish(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(Swish, self).__init__()
+        self.inplace = inplace
+    def forward(self, x):
+        return swish(x, self.inplace)
+def mish(x, inplace: bool = False):
+    """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
+    """
+    return x.mul(F.softplus(x).tanh())
+class Mish(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(Mish, self).__init__()
+        self.inplace = inplace
+    def forward(self, x):
+        return mish(x, self.inplace)
+def sigmoid(x, inplace: bool = False):
+    return x.sigmoid_() if inplace else x.sigmoid()
+# PyTorch has this, but not with a consistent inplace argmument interface
+class Sigmoid(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(Sigmoid, self).__init__()
+        self.inplace = inplace
+    def forward(self, x):
+        return x.sigmoid_() if self.inplace else x.sigmoid()
+def tanh(x, inplace: bool = False):
+    return x.tanh_() if inplace else x.tanh()
+# PyTorch has this, but not with a consistent inplace argmument interface
+class Tanh(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(Tanh, self).__init__()
+        self.inplace = inplace
+    def forward(self, x):
+        return x.tanh_() if self.inplace else x.tanh()
+def hard_swish(x, inplace: bool = False):
+    inner = F.relu6(x + 3.).div_(6.)
+    return x.mul_(inner) if inplace else x.mul(inner)
+class HardSwish(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(HardSwish, self).__init__()
+        self.inplace = inplace
+    def forward(self, x):
+        return hard_swish(x, self.inplace)
+def hard_sigmoid(x, inplace: bool = False):
+    if inplace:
+        return x.add_(3.).clamp_(0., 6.).div_(6.)
+    else:
+        return F.relu6(x + 3.) / 6.
+class HardSigmoid(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(HardSigmoid, self).__init__()
+        self.inplace = inplace
+    def forward(self, x):
+        return hard_sigmoid(x, self.inplace)

geffnet/activations/activations_jit.py ADDED Viewed

	@@ -0,0 +1,79 @@

+""" Activations (jit)
+A collection of jit-scripted activations fn and modules with a common interface so that they can
+easily be swapped. All have an `inplace` arg even if not used.
+All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not
+currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted
+versions if they contain in-place ops.
+Copyright 2020 Ross Wightman
+"""
+import torch
+from torch import nn as nn
+from torch.nn import functional as F
+__all__ = ['swish_jit', 'SwishJit', 'mish_jit', 'MishJit',
+           'hard_sigmoid_jit', 'HardSigmoidJit', 'hard_swish_jit', 'HardSwishJit']
+@torch.jit.script
+def swish_jit(x, inplace: bool = False):
+    """Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3)
+    and also as Swish (https://arxiv.org/abs/1710.05941).
+    TODO Rename to SiLU with addition to PyTorch
+    """
+    return x.mul(x.sigmoid())
+@torch.jit.script
+def mish_jit(x, _inplace: bool = False):
+    """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
+    """
+    return x.mul(F.softplus(x).tanh())
+class SwishJit(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(SwishJit, self).__init__()
+    def forward(self, x):
+        return swish_jit(x)
+class MishJit(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(MishJit, self).__init__()
+    def forward(self, x):
+        return mish_jit(x)
+@torch.jit.script
+def hard_sigmoid_jit(x, inplace: bool = False):
+    # return F.relu6(x + 3.) / 6.
+    return (x + 3).clamp(min=0, max=6).div(6.)  # clamp seems ever so slightly faster?
+class HardSigmoidJit(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(HardSigmoidJit, self).__init__()
+    def forward(self, x):
+        return hard_sigmoid_jit(x)
+@torch.jit.script
+def hard_swish_jit(x, inplace: bool = False):
+    # return x * (F.relu6(x + 3.) / 6)
+    return x * (x + 3).clamp(min=0, max=6).div(6.)  # clamp seems ever so slightly faster?
+class HardSwishJit(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(HardSwishJit, self).__init__()
+    def forward(self, x):
+        return hard_swish_jit(x)

geffnet/activations/activations_me.py ADDED Viewed

	@@ -0,0 +1,170 @@

+""" Activations (memory-efficient w/ custom autograd)
+A collection of activations fn and modules with a common interface so that they can
+easily be swapped. All have an `inplace` arg even if not used.
+These activations are not compatible with jit scripting or ONNX export of the model, please use either
+the JIT or basic versions of the activations.
+Copyright 2020 Ross Wightman
+"""
+import torch
+from torch import nn as nn
+from torch.nn import functional as F
+__all__ = ['swish_me', 'SwishMe', 'mish_me', 'MishMe',
+           'hard_sigmoid_me', 'HardSigmoidMe', 'hard_swish_me', 'HardSwishMe']
+@torch.jit.script
+def swish_jit_fwd(x):
+    return x.mul(torch.sigmoid(x))
+@torch.jit.script
+def swish_jit_bwd(x, grad_output):
+    x_sigmoid = torch.sigmoid(x)
+    return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid)))
+class SwishJitAutoFn(torch.autograd.Function):
+    """ torch.jit.script optimised Swish w/ memory-efficient checkpoint
+    Inspired by conversation btw Jeremy Howard & Adam Pazske
+    https://twitter.com/jeremyphoward/status/1188251041835315200
+    Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3)
+    and also as Swish (https://arxiv.org/abs/1710.05941).
+    TODO Rename to SiLU with addition to PyTorch
+    """
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        return swish_jit_fwd(x)
+    @staticmethod
+    def backward(ctx, grad_output):
+        x = ctx.saved_tensors[0]
+        return swish_jit_bwd(x, grad_output)
+def swish_me(x, inplace=False):
+    return SwishJitAutoFn.apply(x)
+class SwishMe(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(SwishMe, self).__init__()
+    def forward(self, x):
+        return SwishJitAutoFn.apply(x)
+@torch.jit.script
+def mish_jit_fwd(x):
+    return x.mul(torch.tanh(F.softplus(x)))
+@torch.jit.script
+def mish_jit_bwd(x, grad_output):
+    x_sigmoid = torch.sigmoid(x)
+    x_tanh_sp = F.softplus(x).tanh()
+    return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp))
+class MishJitAutoFn(torch.autograd.Function):
+    """ Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
+    A memory efficient, jit scripted variant of Mish
+    """
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        return mish_jit_fwd(x)
+    @staticmethod
+    def backward(ctx, grad_output):
+        x = ctx.saved_tensors[0]
+        return mish_jit_bwd(x, grad_output)
+def mish_me(x, inplace=False):
+    return MishJitAutoFn.apply(x)
+class MishMe(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(MishMe, self).__init__()
+    def forward(self, x):
+        return MishJitAutoFn.apply(x)
+def hard_sigmoid_jit_fwd(x, inplace: bool = False):
+    return (x + 3).clamp(min=0, max=6).div(6.)
+def hard_sigmoid_jit_bwd(x, grad_output):
+    m = torch.ones_like(x) * ((x >= -3.) & (x <= 3.)) / 6.
+    return grad_output * m
+class HardSigmoidJitAutoFn(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        return hard_sigmoid_jit_fwd(x)
+    @staticmethod
+    def backward(ctx, grad_output):
+        x = ctx.saved_tensors[0]
+        return hard_sigmoid_jit_bwd(x, grad_output)
+def hard_sigmoid_me(x, inplace: bool = False):
+    return HardSigmoidJitAutoFn.apply(x)
+class HardSigmoidMe(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(HardSigmoidMe, self).__init__()
+    def forward(self, x):
+        return HardSigmoidJitAutoFn.apply(x)
+def hard_swish_jit_fwd(x):
+    return x * (x + 3).clamp(min=0, max=6).div(6.)
+def hard_swish_jit_bwd(x, grad_output):
+    m = torch.ones_like(x) * (x >= 3.)
+    m = torch.where((x >= -3.) & (x <= 3.),  x / 3. + .5, m)
+    return grad_output * m
+class HardSwishJitAutoFn(torch.autograd.Function):
+    """A memory efficient, jit-scripted HardSwish activation"""
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        return hard_swish_jit_fwd(x)
+    @staticmethod
+    def backward(ctx, grad_output):
+        x = ctx.saved_tensors[0]
+        return hard_swish_jit_bwd(x, grad_output)
+def hard_swish_me(x, inplace=False):
+    return HardSwishJitAutoFn.apply(x)
+class HardSwishMe(nn.Module):
+    def __init__(self, inplace: bool = False):
+        super(HardSwishMe, self).__init__()
+    def forward(self, x):
+        return HardSwishJitAutoFn.apply(x)

geffnet/config.py ADDED Viewed

	@@ -0,0 +1,123 @@

+""" Global layer config state
+"""
+from typing import Any, Optional
+__all__ = [
+    'is_exportable', 'is_scriptable', 'is_no_jit', 'layer_config_kwargs',
+    'set_exportable', 'set_scriptable', 'set_no_jit', 'set_layer_config'
+]
+# Set to True if prefer to have layers with no jit optimization (includes activations)
+_NO_JIT = False
+# Set to True if prefer to have activation layers with no jit optimization
+# NOTE not currently used as no difference between no_jit and no_activation jit as only layers obeying
+# the jit flags so far are activations. This will change as more layers are updated and/or added.
+_NO_ACTIVATION_JIT = False
+# Set to True if exporting a model with Same padding via ONNX
+_EXPORTABLE = False
+# Set to True if wanting to use torch.jit.script on a model
+_SCRIPTABLE = False
+def is_no_jit():
+    return _NO_JIT
+class set_no_jit:
+    def __init__(self, mode: bool) -> None:
+        global _NO_JIT
+        self.prev = _NO_JIT
+        _NO_JIT = mode
+    def __enter__(self) -> None:
+        pass
+    def __exit__(self, *args: Any) -> bool:
+        global _NO_JIT
+        _NO_JIT = self.prev
+        return False
+def is_exportable():
+    return _EXPORTABLE
+class set_exportable:
+    def __init__(self, mode: bool) -> None:
+        global _EXPORTABLE
+        self.prev = _EXPORTABLE
+        _EXPORTABLE = mode
+    def __enter__(self) -> None:
+        pass
+    def __exit__(self, *args: Any) -> bool:
+        global _EXPORTABLE
+        _EXPORTABLE = self.prev
+        return False
+def is_scriptable():
+    return _SCRIPTABLE
+class set_scriptable:
+    def __init__(self, mode: bool) -> None:
+        global _SCRIPTABLE
+        self.prev = _SCRIPTABLE
+        _SCRIPTABLE = mode
+    def __enter__(self) -> None:
+        pass
+    def __exit__(self, *args: Any) -> bool:
+        global _SCRIPTABLE
+        _SCRIPTABLE = self.prev
+        return False
+class set_layer_config:
+    """ Layer config context manager that allows setting all layer config flags at once.
+    If a flag arg is None, it will not change the current value.
+    """
+    def __init__(
+            self,
+            scriptable: Optional[bool] = None,
+            exportable: Optional[bool] = None,
+            no_jit: Optional[bool] = None,
+            no_activation_jit: Optional[bool] = None):
+        global _SCRIPTABLE
+        global _EXPORTABLE
+        global _NO_JIT
+        global _NO_ACTIVATION_JIT
+        self.prev = _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT
+        if scriptable is not None:
+            _SCRIPTABLE = scriptable
+        if exportable is not None:
+            _EXPORTABLE = exportable
+        if no_jit is not None:
+            _NO_JIT = no_jit
+        if no_activation_jit is not None:
+            _NO_ACTIVATION_JIT = no_activation_jit
+    def __enter__(self) -> None:
+        pass
+    def __exit__(self, *args: Any) -> bool:
+        global _SCRIPTABLE
+        global _EXPORTABLE
+        global _NO_JIT
+        global _NO_ACTIVATION_JIT
+        _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT = self.prev
+        return False
+def layer_config_kwargs(kwargs):
+    """ Consume config kwargs and return contextmgr obj """
+    return set_layer_config(
+        scriptable=kwargs.pop('scriptable', None),
+        exportable=kwargs.pop('exportable', None),
+        no_jit=kwargs.pop('no_jit', None))

geffnet/conv2d_layers.py ADDED Viewed

	@@ -0,0 +1,304 @@

+""" Conv2D w/ SAME padding, CondConv, MixedConv
+A collection of conv layers and padding helpers needed by EfficientNet, MixNet, and
+MobileNetV3 models that maintain weight compatibility with original Tensorflow models.
+Copyright 2020 Ross Wightman
+"""
+import collections.abc
+import math
+from functools import partial
+from itertools import repeat
+from typing import Tuple, Optional
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .config import *
+# From PyTorch internals
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable):
+            return x
+        return tuple(repeat(x, n))
+    return parse
+_single = _ntuple(1)
+_pair = _ntuple(2)
+_triple = _ntuple(3)
+_quadruple = _ntuple(4)
+def _is_static_pad(kernel_size, stride=1, dilation=1, **_):
+    return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0
+def _get_padding(kernel_size, stride=1, dilation=1, **_):
+    padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2
+    return padding
+def _calc_same_pad(i: int, k: int, s: int, d: int):
+    return max((-(i // -s) - 1) * s + (k - 1) * d + 1 - i, 0)
+def _same_pad_arg(input_size, kernel_size, stride, dilation):
+    ih, iw = input_size
+    kh, kw = kernel_size
+    pad_h = _calc_same_pad(ih, kh, stride[0], dilation[0])
+    pad_w = _calc_same_pad(iw, kw, stride[1], dilation[1])
+    return [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]
+def _split_channels(num_chan, num_groups):
+    split = [num_chan // num_groups for _ in range(num_groups)]
+    split[0] += num_chan - sum(split)
+    return split
+def conv2d_same(
+        x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, stride: Tuple[int, int] = (1, 1),
+        padding: Tuple[int, int] = (0, 0), dilation: Tuple[int, int] = (1, 1), groups: int = 1):
+    ih, iw = x.size()[-2:]
+    kh, kw = weight.size()[-2:]
+    pad_h = _calc_same_pad(ih, kh, stride[0], dilation[0])
+    pad_w = _calc_same_pad(iw, kw, stride[1], dilation[1])
+    x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
+    return F.conv2d(x, weight, bias, stride, (0, 0), dilation, groups)
+class Conv2dSame(nn.Conv2d):
+    """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions
+    """
+    # pylint: disable=unused-argument
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True):
+        super(Conv2dSame, self).__init__(
+            in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
+    def forward(self, x):
+        return conv2d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
+class Conv2dSameExport(nn.Conv2d):
+    """ ONNX export friendly Tensorflow like 'SAME' convolution wrapper for 2D convolutions
+    NOTE: This does not currently work with torch.jit.script
+    """
+    # pylint: disable=unused-argument
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True):
+        super(Conv2dSameExport, self).__init__(
+            in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
+        self.pad = None
+        self.pad_input_size = (0, 0)
+    def forward(self, x):
+        input_size = x.size()[-2:]
+        if self.pad is None:
+            pad_arg = _same_pad_arg(input_size, self.weight.size()[-2:], self.stride, self.dilation)
+            self.pad = nn.ZeroPad2d(pad_arg)
+            self.pad_input_size = input_size
+        if self.pad is not None:
+            x = self.pad(x)
+        return F.conv2d(
+            x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
+def get_padding_value(padding, kernel_size, **kwargs):
+    dynamic = False
+    if isinstance(padding, str):
+        # for any string padding, the padding will be calculated for you, one of three ways
+        padding = padding.lower()
+        if padding == 'same':
+            # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact
+            if _is_static_pad(kernel_size, **kwargs):
+                # static case, no extra overhead
+                padding = _get_padding(kernel_size, **kwargs)
+            else:
+                # dynamic padding
+                padding = 0
+                dynamic = True
+        elif padding == 'valid':
+            # 'VALID' padding, same as padding=0
+            padding = 0
+        else:
+            # Default to PyTorch style 'same'-ish symmetric padding
+            padding = _get_padding(kernel_size, **kwargs)
+    return padding, dynamic
+def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs):
+    padding = kwargs.pop('padding', '')
+    kwargs.setdefault('bias', False)
+    padding, is_dynamic = get_padding_value(padding, kernel_size, **kwargs)
+    if is_dynamic:
+        if is_exportable():
+            assert not is_scriptable()
+            return Conv2dSameExport(in_chs, out_chs, kernel_size, **kwargs)
+        else:
+            return Conv2dSame(in_chs, out_chs, kernel_size, **kwargs)
+    else:
+        return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs)
+class MixedConv2d(nn.ModuleDict):
+    """ Mixed Grouped Convolution
+    Based on MDConv and GroupedConv in MixNet impl:
+      https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mixnet/custom_layers.py
+    """
+    def __init__(self, in_channels, out_channels, kernel_size=3,
+                 stride=1, padding='', dilation=1, depthwise=False, **kwargs):
+        super(MixedConv2d, self).__init__()
+        kernel_size = kernel_size if isinstance(kernel_size, list) else [kernel_size]
+        num_groups = len(kernel_size)
+        in_splits = _split_channels(in_channels, num_groups)
+        out_splits = _split_channels(out_channels, num_groups)
+        self.in_channels = sum(in_splits)
+        self.out_channels = sum(out_splits)
+        for idx, (k, in_ch, out_ch) in enumerate(zip(kernel_size, in_splits, out_splits)):
+            conv_groups = out_ch if depthwise else 1
+            self.add_module(
+                str(idx),
+                create_conv2d_pad(
+                    in_ch, out_ch, k, stride=stride,
+                    padding=padding, dilation=dilation, groups=conv_groups, **kwargs)
+            )
+        self.splits = in_splits
+    def forward(self, x):
+        x_split = torch.split(x, self.splits, 1)
+        x_out = [conv(x_split[i]) for i, conv in enumerate(self.values())]
+        x = torch.cat(x_out, 1)
+        return x
+def get_condconv_initializer(initializer, num_experts, expert_shape):
+    def condconv_initializer(weight):
+        """CondConv initializer function."""
+        num_params = np.prod(expert_shape)
+        if (len(weight.shape) != 2 or weight.shape[0] != num_experts or
+                weight.shape[1] != num_params):
+            raise (ValueError(
+                'CondConv variables must have shape [num_experts, num_params]'))
+        for i in range(num_experts):
+            initializer(weight[i].view(expert_shape))
+    return condconv_initializer
+class CondConv2d(nn.Module):
+    """ Conditional Convolution
+    Inspired by: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/condconv/condconv_layers.py
+    Grouped convolution hackery for parallel execution of the per-sample kernel filters inspired by this discussion:
+    https://github.com/pytorch/pytorch/issues/17983
+    """
+    __constants__ = ['bias', 'in_channels', 'out_channels', 'dynamic_padding']
+    def __init__(self, in_channels, out_channels, kernel_size=3,
+                 stride=1, padding='', dilation=1, groups=1, bias=False, num_experts=4):
+        super(CondConv2d, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride)
+        padding_val, is_padding_dynamic = get_padding_value(
+            padding, kernel_size, stride=stride, dilation=dilation)
+        self.dynamic_padding = is_padding_dynamic  # if in forward to work with torchscript
+        self.padding = _pair(padding_val)
+        self.dilation = _pair(dilation)
+        self.groups = groups
+        self.num_experts = num_experts
+        self.weight_shape = (self.out_channels, self.in_channels // self.groups) + self.kernel_size
+        weight_num_param = 1
+        for wd in self.weight_shape:
+            weight_num_param *= wd
+        self.weight = torch.nn.Parameter(torch.Tensor(self.num_experts, weight_num_param))
+        if bias:
+            self.bias_shape = (self.out_channels,)
+            self.bias = torch.nn.Parameter(torch.Tensor(self.num_experts, self.out_channels))
+        else:
+            self.register_parameter('bias', None)
+        self.reset_parameters()
+    def reset_parameters(self):
+        init_weight = get_condconv_initializer(
+            partial(nn.init.kaiming_uniform_, a=math.sqrt(5)), self.num_experts, self.weight_shape)
+        init_weight(self.weight)
+        if self.bias is not None:
+            fan_in = np.prod(self.weight_shape[1:])
+            bound = 1 / math.sqrt(fan_in)
+            init_bias = get_condconv_initializer(
+                partial(nn.init.uniform_, a=-bound, b=bound), self.num_experts, self.bias_shape)
+            init_bias(self.bias)
+    def forward(self, x, routing_weights):
+        B, C, H, W = x.shape
+        weight = torch.matmul(routing_weights, self.weight)
+        new_weight_shape = (B * self.out_channels, self.in_channels // self.groups) + self.kernel_size
+        weight = weight.view(new_weight_shape)
+        bias = None
+        if self.bias is not None:
+            bias = torch.matmul(routing_weights, self.bias)
+            bias = bias.view(B * self.out_channels)
+        # move batch elements with channels so each batch element can be efficiently convolved with separate kernel
+        x = x.view(1, B * C, H, W)
+        if self.dynamic_padding:
+            out = conv2d_same(
+                x, weight, bias, stride=self.stride, padding=self.padding,
+                dilation=self.dilation, groups=self.groups * B)
+        else:
+            out = F.conv2d(
+                x, weight, bias, stride=self.stride, padding=self.padding,
+                dilation=self.dilation, groups=self.groups * B)
+        out = out.permute([1, 0, 2, 3]).view(B, self.out_channels, out.shape[-2], out.shape[-1])
+        # Literal port (from TF definition)
+        # x = torch.split(x, 1, 0)
+        # weight = torch.split(weight, 1, 0)
+        # if self.bias is not None:
+        #     bias = torch.matmul(routing_weights, self.bias)
+        #     bias = torch.split(bias, 1, 0)
+        # else:
+        #     bias = [None] * B
+        # out = []
+        # for xi, wi, bi in zip(x, weight, bias):
+        #     wi = wi.view(*self.weight_shape)
+        #     if bi is not None:
+        #         bi = bi.view(*self.bias_shape)
+        #     out.append(self.conv_fn(
+        #         xi, wi, bi, stride=self.stride, padding=self.padding,
+        #         dilation=self.dilation, groups=self.groups))
+        # out = torch.cat(out, 0)
+        return out
+def select_conv2d(in_chs, out_chs, kernel_size, **kwargs):
+    assert 'groups' not in kwargs  # only use 'depthwise' bool arg
+    if isinstance(kernel_size, list):
+        assert 'num_experts' not in kwargs  # MixNet + CondConv combo not supported currently
+        # We're going to use only lists for defining the MixedConv2d kernel groups,
+        # ints, tuples, other iterables will continue to pass to normal conv and specify h, w.
+        m = MixedConv2d(in_chs, out_chs, kernel_size, **kwargs)
+    else:
+        depthwise = kwargs.pop('depthwise', False)
+        groups = out_chs if depthwise else 1
+        if 'num_experts' in kwargs and kwargs['num_experts'] > 0:
+            m = CondConv2d(in_chs, out_chs, kernel_size, groups=groups, **kwargs)
+        else:
+            m = create_conv2d_pad(in_chs, out_chs, kernel_size, groups=groups, **kwargs)
+    return m

geffnet/efficientnet_builder.py ADDED Viewed

	@@ -0,0 +1,683 @@

+""" EfficientNet / MobileNetV3 Blocks and Builder
+Copyright 2020 Ross Wightman
+"""
+import re
+from copy import deepcopy
+from .conv2d_layers import *
+from geffnet.activations import *
+__all__ = ['get_bn_args_tf', 'resolve_bn_args', 'resolve_se_args', 'resolve_act_layer', 'make_divisible',
+           'round_channels', 'drop_connect', 'SqueezeExcite', 'ConvBnAct', 'DepthwiseSeparableConv',
+           'InvertedResidual', 'CondConvResidual', 'EdgeResidual', 'EfficientNetBuilder', 'decode_arch_def',
+           'initialize_weight_default', 'initialize_weight_goog', 'BN_MOMENTUM_TF_DEFAULT', 'BN_EPS_TF_DEFAULT'
+]
+# Defaults used for Google/Tensorflow training of mobile networks /w RMSprop as per
+# papers and TF reference implementations. PT momentum equiv for TF decay is (1 - TF decay)
+# NOTE: momentum varies btw .99 and .9997 depending on source
+# .99 in official TF TPU impl
+# .9997 (/w .999 in search space) for paper
+#
+# PyTorch defaults are momentum = .1, eps = 1e-5
+#
+BN_MOMENTUM_TF_DEFAULT = 1 - 0.99
+BN_EPS_TF_DEFAULT = 1e-3
+_BN_ARGS_TF = dict(momentum=BN_MOMENTUM_TF_DEFAULT, eps=BN_EPS_TF_DEFAULT)
+def get_bn_args_tf():
+    return _BN_ARGS_TF.copy()
+def resolve_bn_args(kwargs):
+    bn_args = get_bn_args_tf() if kwargs.pop('bn_tf', False) else {}
+    bn_momentum = kwargs.pop('bn_momentum', None)
+    if bn_momentum is not None:
+        bn_args['momentum'] = bn_momentum
+    bn_eps = kwargs.pop('bn_eps', None)
+    if bn_eps is not None:
+        bn_args['eps'] = bn_eps
+    return bn_args
+_SE_ARGS_DEFAULT = dict(
+    gate_fn=sigmoid,
+    act_layer=None,  # None == use containing block's activation layer
+    reduce_mid=False,
+    divisor=1)
+def resolve_se_args(kwargs, in_chs, act_layer=None):
+    se_kwargs = kwargs.copy() if kwargs is not None else {}
+    # fill in args that aren't specified with the defaults
+    for k, v in _SE_ARGS_DEFAULT.items():
+        se_kwargs.setdefault(k, v)
+    # some models, like MobilNetV3, calculate SE reduction chs from the containing block's mid_ch instead of in_ch
+    if not se_kwargs.pop('reduce_mid'):
+        se_kwargs['reduced_base_chs'] = in_chs
+    # act_layer override, if it remains None, the containing block's act_layer will be used
+    if se_kwargs['act_layer'] is None:
+        assert act_layer is not None
+        se_kwargs['act_layer'] = act_layer
+    return se_kwargs
+def resolve_act_layer(kwargs, default='relu'):
+    act_layer = kwargs.pop('act_layer', default)
+    if isinstance(act_layer, str):
+        act_layer = get_act_layer(act_layer)
+    return act_layer
+def make_divisible(v: int, divisor: int = 8, min_value: int = None):
+    min_value = min_value or divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:  # ensure round down does not go down by more than 10%.
+        new_v += divisor
+    return new_v
+def round_channels(channels, multiplier=1.0, divisor=8, channel_min=None):
+    """Round number of filters based on depth multiplier."""
+    if not multiplier:
+        return channels
+    channels *= multiplier
+    return make_divisible(channels, divisor, channel_min)
+def drop_connect(inputs, training: bool = False, drop_connect_rate: float = 0.):
+    """Apply drop connect."""
+    if not training:
+        return inputs
+    keep_prob = 1 - drop_connect_rate
+    random_tensor = keep_prob + torch.rand(
+        (inputs.size()[0], 1, 1, 1), dtype=inputs.dtype, device=inputs.device)
+    random_tensor.floor_()  # binarize
+    output = inputs.div(keep_prob) * random_tensor
+    return output
+class SqueezeExcite(nn.Module):
+    def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None, act_layer=nn.ReLU, gate_fn=sigmoid, divisor=1):
+        super(SqueezeExcite, self).__init__()
+        reduced_chs = make_divisible((reduced_base_chs or in_chs) * se_ratio, divisor)
+        self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True)
+        self.act1 = act_layer(inplace=True)
+        self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True)
+        self.gate_fn = gate_fn
+    def forward(self, x):
+        x_se = x.mean((2, 3), keepdim=True)
+        x_se = self.conv_reduce(x_se)
+        x_se = self.act1(x_se)
+        x_se = self.conv_expand(x_se)
+        x = x * self.gate_fn(x_se)
+        return x
+class ConvBnAct(nn.Module):
+    def __init__(self, in_chs, out_chs, kernel_size,
+                 stride=1, pad_type='', act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, norm_kwargs=None):
+        super(ConvBnAct, self).__init__()
+        assert stride in [1, 2]
+        norm_kwargs = norm_kwargs or {}
+        self.conv = select_conv2d(in_chs, out_chs, kernel_size, stride=stride, padding=pad_type)
+        self.bn1 = norm_layer(out_chs, **norm_kwargs)
+        self.act1 = act_layer(inplace=True)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        return x
+class DepthwiseSeparableConv(nn.Module):
+    """ DepthwiseSeparable block
+    Used for DS convs in MobileNet-V1 and in the place of IR blocks with an expansion
+    factor of 1.0. This is an alternative to having a IR with optional first pw conv.
+    """
+    def __init__(self, in_chs, out_chs, dw_kernel_size=3,
+                 stride=1, pad_type='', act_layer=nn.ReLU, noskip=False,
+                 pw_kernel_size=1, pw_act=False, se_ratio=0., se_kwargs=None,
+                 norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.):
+        super(DepthwiseSeparableConv, self).__init__()
+        assert stride in [1, 2]
+        norm_kwargs = norm_kwargs or {}
+        self.has_residual = (stride == 1 and in_chs == out_chs) and not noskip
+        self.drop_connect_rate = drop_connect_rate
+        self.conv_dw = select_conv2d(
+            in_chs, in_chs, dw_kernel_size, stride=stride, padding=pad_type, depthwise=True)
+        self.bn1 = norm_layer(in_chs, **norm_kwargs)
+        self.act1 = act_layer(inplace=True)
+        # Squeeze-and-excitation
+        if se_ratio is not None and se_ratio > 0.:
+            se_kwargs = resolve_se_args(se_kwargs, in_chs, act_layer)
+            self.se = SqueezeExcite(in_chs, se_ratio=se_ratio, **se_kwargs)
+        else:
+            self.se = nn.Identity()
+        self.conv_pw = select_conv2d(in_chs, out_chs, pw_kernel_size, padding=pad_type)
+        self.bn2 = norm_layer(out_chs, **norm_kwargs)
+        self.act2 = act_layer(inplace=True) if pw_act else nn.Identity()
+    def forward(self, x):
+        residual = x
+        x = self.conv_dw(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.se(x)
+        x = self.conv_pw(x)
+        x = self.bn2(x)
+        x = self.act2(x)
+        if self.has_residual:
+            if self.drop_connect_rate > 0.:
+                x = drop_connect(x, self.training, self.drop_connect_rate)
+            x += residual
+        return x
+class InvertedResidual(nn.Module):
+    """ Inverted residual block w/ optional SE"""
+    def __init__(self, in_chs, out_chs, dw_kernel_size=3,
+                 stride=1, pad_type='', act_layer=nn.ReLU, noskip=False,
+                 exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1,
+                 se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
+                 conv_kwargs=None, drop_connect_rate=0.):
+        super(InvertedResidual, self).__init__()
+        norm_kwargs = norm_kwargs or {}
+        conv_kwargs = conv_kwargs or {}
+        mid_chs: int = make_divisible(in_chs * exp_ratio)
+        self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
+        self.drop_connect_rate = drop_connect_rate
+        # Point-wise expansion
+        self.conv_pw = select_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type, **conv_kwargs)
+        self.bn1 = norm_layer(mid_chs, **norm_kwargs)
+        self.act1 = act_layer(inplace=True)
+        # Depth-wise convolution
+        self.conv_dw = select_conv2d(
+            mid_chs, mid_chs, dw_kernel_size, stride=stride, padding=pad_type, depthwise=True, **conv_kwargs)
+        self.bn2 = norm_layer(mid_chs, **norm_kwargs)
+        self.act2 = act_layer(inplace=True)
+        # Squeeze-and-excitation
+        if se_ratio is not None and se_ratio > 0.:
+            se_kwargs = resolve_se_args(se_kwargs, in_chs, act_layer)
+            self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio, **se_kwargs)
+        else:
+            self.se = nn.Identity()  # for jit.script compat
+        # Point-wise linear projection
+        self.conv_pwl = select_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type, **conv_kwargs)
+        self.bn3 = norm_layer(out_chs, **norm_kwargs)
+    def forward(self, x):
+        residual = x
+        # Point-wise expansion
+        x = self.conv_pw(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        # Depth-wise convolution
+        x = self.conv_dw(x)
+        x = self.bn2(x)
+        x = self.act2(x)
+        # Squeeze-and-excitation
+        x = self.se(x)
+        # Point-wise linear projection
+        x = self.conv_pwl(x)
+        x = self.bn3(x)
+        if self.has_residual:
+            if self.drop_connect_rate > 0.:
+                x = drop_connect(x, self.training, self.drop_connect_rate)
+            x += residual
+        return x
+class CondConvResidual(InvertedResidual):
+    """ Inverted residual block w/ CondConv routing"""
+    def __init__(self, in_chs, out_chs, dw_kernel_size=3,
+                 stride=1, pad_type='', act_layer=nn.ReLU, noskip=False,
+                 exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1,
+                 se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
+                 num_experts=0, drop_connect_rate=0.):
+        self.num_experts = num_experts
+        conv_kwargs = dict(num_experts=self.num_experts)
+        super(CondConvResidual, self).__init__(
+            in_chs, out_chs, dw_kernel_size=dw_kernel_size, stride=stride, pad_type=pad_type,
+            act_layer=act_layer, noskip=noskip, exp_ratio=exp_ratio, exp_kernel_size=exp_kernel_size,
+            pw_kernel_size=pw_kernel_size, se_ratio=se_ratio, se_kwargs=se_kwargs,
+            norm_layer=norm_layer, norm_kwargs=norm_kwargs, conv_kwargs=conv_kwargs,
+            drop_connect_rate=drop_connect_rate)
+        self.routing_fn = nn.Linear(in_chs, self.num_experts)
+    def forward(self, x):
+        residual = x
+        # CondConv routing
+        pooled_inputs = F.adaptive_avg_pool2d(x, 1).flatten(1)
+        routing_weights = torch.sigmoid(self.routing_fn(pooled_inputs))
+        # Point-wise expansion
+        x = self.conv_pw(x, routing_weights)
+        x = self.bn1(x)
+        x = self.act1(x)
+        # Depth-wise convolution
+        x = self.conv_dw(x, routing_weights)
+        x = self.bn2(x)
+        x = self.act2(x)
+        # Squeeze-and-excitation
+        x = self.se(x)
+        # Point-wise linear projection
+        x = self.conv_pwl(x, routing_weights)
+        x = self.bn3(x)
+        if self.has_residual:
+            if self.drop_connect_rate > 0.:
+                x = drop_connect(x, self.training, self.drop_connect_rate)
+            x += residual
+        return x
+class EdgeResidual(nn.Module):
+    """ EdgeTPU Residual block with expansion convolution followed by pointwise-linear w/ stride"""
+    def __init__(self, in_chs, out_chs, exp_kernel_size=3, exp_ratio=1.0, fake_in_chs=0,
+                 stride=1, pad_type='', act_layer=nn.ReLU, noskip=False, pw_kernel_size=1,
+                 se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.):
+        super(EdgeResidual, self).__init__()
+        norm_kwargs = norm_kwargs or {}
+        mid_chs = make_divisible(fake_in_chs * exp_ratio) if fake_in_chs > 0 else make_divisible(in_chs * exp_ratio)
+        self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
+        self.drop_connect_rate = drop_connect_rate
+        # Expansion convolution
+        self.conv_exp = select_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type)
+        self.bn1 = norm_layer(mid_chs, **norm_kwargs)
+        self.act1 = act_layer(inplace=True)
+        # Squeeze-and-excitation
+        if se_ratio is not None and se_ratio > 0.:
+            se_kwargs = resolve_se_args(se_kwargs, in_chs, act_layer)
+            self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio, **se_kwargs)
+        else:
+            self.se = nn.Identity()
+        # Point-wise linear projection
+        self.conv_pwl = select_conv2d(mid_chs, out_chs, pw_kernel_size, stride=stride, padding=pad_type)
+        self.bn2 = nn.BatchNorm2d(out_chs, **norm_kwargs)
+    def forward(self, x):
+        residual = x
+        # Expansion convolution
+        x = self.conv_exp(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        # Squeeze-and-excitation
+        x = self.se(x)
+        # Point-wise linear projection
+        x = self.conv_pwl(x)
+        x = self.bn2(x)
+        if self.has_residual:
+            if self.drop_connect_rate > 0.:
+                x = drop_connect(x, self.training, self.drop_connect_rate)
+            x += residual
+        return x
+class EfficientNetBuilder:
+    """ Build Trunk Blocks for Efficient/Mobile Networks
+    This ended up being somewhat of a cross between
+    https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_models.py
+    and
+    https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_builder.py
+    """
+    def __init__(self, channel_multiplier=1.0, channel_divisor=8, channel_min=None,
+                 pad_type='', act_layer=None, se_kwargs=None,
+                 norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.):
+        self.channel_multiplier = channel_multiplier
+        self.channel_divisor = channel_divisor
+        self.channel_min = channel_min
+        self.pad_type = pad_type
+        self.act_layer = act_layer
+        self.se_kwargs = se_kwargs
+        self.norm_layer = norm_layer
+        self.norm_kwargs = norm_kwargs
+        self.drop_connect_rate = drop_connect_rate
+        # updated during build
+        self.in_chs = None
+        self.block_idx = 0
+        self.block_count = 0
+    def _round_channels(self, chs):
+        return round_channels(chs, self.channel_multiplier, self.channel_divisor, self.channel_min)
+    def _make_block(self, ba):
+        bt = ba.pop('block_type')
+        ba['in_chs'] = self.in_chs
+        ba['out_chs'] = self._round_channels(ba['out_chs'])
+        if 'fake_in_chs' in ba and ba['fake_in_chs']:
+            # FIXME this is a hack to work around mismatch in origin impl input filters for EdgeTPU
+            ba['fake_in_chs'] = self._round_channels(ba['fake_in_chs'])
+        ba['norm_layer'] = self.norm_layer
+        ba['norm_kwargs'] = self.norm_kwargs
+        ba['pad_type'] = self.pad_type
+        # block act fn overrides the model default
+        ba['act_layer'] = ba['act_layer'] if ba['act_layer'] is not None else self.act_layer
+        assert ba['act_layer'] is not None
+        if bt == 'ir':
+            ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count
+            ba['se_kwargs'] = self.se_kwargs
+            if ba.get('num_experts', 0) > 0:
+                block = CondConvResidual(**ba)
+            else:
+                block = InvertedResidual(**ba)
+        elif bt == 'ds' or bt == 'dsa':
+            ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count
+            ba['se_kwargs'] = self.se_kwargs
+            block = DepthwiseSeparableConv(**ba)
+        elif bt == 'er':
+            ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count
+            ba['se_kwargs'] = self.se_kwargs
+            block = EdgeResidual(**ba)
+        elif bt == 'cn':
+            block = ConvBnAct(**ba)
+        else:
+            assert False, 'Uknkown block type (%s) while building model.' % bt
+        self.in_chs = ba['out_chs']  # update in_chs for arg of next block
+        return block
+    def _make_stack(self, stack_args):
+        blocks = []
+        # each stack (stage) contains a list of block arguments
+        for i, ba in enumerate(stack_args):
+            if i >= 1:
+                # only the first block in any stack can have a stride > 1
+                ba['stride'] = 1
+            block = self._make_block(ba)
+            blocks.append(block)
+            self.block_idx += 1  # incr global idx (across all stacks)
+        return nn.Sequential(*blocks)
+    def __call__(self, in_chs, block_args):
+        """ Build the blocks
+        Args:
+            in_chs: Number of input-channels passed to first block
+            block_args: A list of lists, outer list defines stages, inner
+                list contains strings defining block configuration(s)
+        Return:
+             List of block stacks (each stack wrapped in nn.Sequential)
+        """
+        self.in_chs = in_chs
+        self.block_count = sum([len(x) for x in block_args])
+        self.block_idx = 0
+        blocks = []
+        # outer list of block_args defines the stacks ('stages' by some conventions)
+        for stack_idx, stack in enumerate(block_args):
+            assert isinstance(stack, list)
+            stack = self._make_stack(stack)
+            blocks.append(stack)
+        return blocks
+def _parse_ksize(ss):
+    if ss.isdigit():
+        return int(ss)
+    else:
+        return [int(k) for k in ss.split('.')]
+def _decode_block_str(block_str):
+    """ Decode block definition string
+    Gets a list of block arg (dicts) through a string notation of arguments.
+    E.g. ir_r2_k3_s2_e1_i32_o16_se0.25_noskip
+    All args can exist in any order with the exception of the leading string which
+    is assumed to indicate the block type.
+    leading string - block type (
+      ir = InvertedResidual, ds = DepthwiseSep, dsa = DeptwhiseSep with pw act, cn = ConvBnAct)
+    r - number of repeat blocks,
+    k - kernel size,
+    s - strides (1-9),
+    e - expansion ratio,
+    c - output channels,
+    se - squeeze/excitation ratio
+    n - activation fn ('re', 'r6', 'hs', or 'sw')
+    Args:
+        block_str: a string representation of block arguments.
+    Returns:
+        A list of block args (dicts)
+    Raises:
+        ValueError: if the string def not properly specified (TODO)
+    """
+    assert isinstance(block_str, str)
+    ops = block_str.split('_')
+    block_type = ops[0]  # take the block type off the front
+    ops = ops[1:]
+    options = {}
+    noskip = False
+    for op in ops:
+        # string options being checked on individual basis, combine if they grow
+        if op == 'noskip':
+            noskip = True
+        elif op.startswith('n'):
+            # activation fn
+            key = op[0]
+            v = op[1:]
+            if v == 're':
+                value = get_act_layer('relu')
+            elif v == 'r6':
+                value = get_act_layer('relu6')
+            elif v == 'hs':
+                value = get_act_layer('hard_swish')
+            elif v == 'sw':
+                value = get_act_layer('swish')
+            else:
+                continue
+            options[key] = value
+        else:
+            # all numeric options
+            splits = re.split(r'(\d.*)', op)
+            if len(splits) >= 2:
+                key, value = splits[:2]
+                options[key] = value
+    # if act_layer is None, the model default (passed to model init) will be used
+    act_layer = options['n'] if 'n' in options else None
+    exp_kernel_size = _parse_ksize(options['a']) if 'a' in options else 1
+    pw_kernel_size = _parse_ksize(options['p']) if 'p' in options else 1
+    fake_in_chs = int(options['fc']) if 'fc' in options else 0  # FIXME hack to deal with in_chs issue in TPU def
+    num_repeat = int(options['r'])
+    # each type of block has different valid arguments, fill accordingly
+    if block_type == 'ir':
+        block_args = dict(
+            block_type=block_type,
+            dw_kernel_size=_parse_ksize(options['k']),
+            exp_kernel_size=exp_kernel_size,
+            pw_kernel_size=pw_kernel_size,
+            out_chs=int(options['c']),
+            exp_ratio=float(options['e']),
+            se_ratio=float(options['se']) if 'se' in options else None,
+            stride=int(options['s']),
+            act_layer=act_layer,
+            noskip=noskip,
+        )
+        if 'cc' in options:
+            block_args['num_experts'] = int(options['cc'])
+    elif block_type == 'ds' or block_type == 'dsa':
+        block_args = dict(
+            block_type=block_type,
+            dw_kernel_size=_parse_ksize(options['k']),
+            pw_kernel_size=pw_kernel_size,
+            out_chs=int(options['c']),
+            se_ratio=float(options['se']) if 'se' in options else None,
+            stride=int(options['s']),
+            act_layer=act_layer,
+            pw_act=block_type == 'dsa',
+            noskip=block_type == 'dsa' or noskip,
+        )
+    elif block_type == 'er':
+        block_args = dict(
+            block_type=block_type,
+            exp_kernel_size=_parse_ksize(options['k']),
+            pw_kernel_size=pw_kernel_size,
+            out_chs=int(options['c']),
+            exp_ratio=float(options['e']),
+            fake_in_chs=fake_in_chs,
+            se_ratio=float(options['se']) if 'se' in options else None,
+            stride=int(options['s']),
+            act_layer=act_layer,
+            noskip=noskip,
+        )
+    elif block_type == 'cn':
+        block_args = dict(
+            block_type=block_type,
+            kernel_size=int(options['k']),
+            out_chs=int(options['c']),
+            stride=int(options['s']),
+            act_layer=act_layer,
+        )
+    else:
+        assert False, 'Unknown block type (%s)' % block_type
+    return block_args, num_repeat
+def _scale_stage_depth(stack_args, repeats, depth_multiplier=1.0, depth_trunc='ceil'):
+    """ Per-stage depth scaling
+    Scales the block repeats in each stage. This depth scaling impl maintains
+    compatibility with the EfficientNet scaling method, while allowing sensible
+    scaling for other models that may have multiple block arg definitions in each stage.
+    """
+    # We scale the total repeat count for each stage, there may be multiple
+    # block arg defs per stage so we need to sum.
+    num_repeat = sum(repeats)
+    if depth_trunc == 'round':
+        # Truncating to int by rounding allows stages with few repeats to remain
+        # proportionally smaller for longer. This is a good choice when stage definitions
+        # include single repeat stages that we'd prefer to keep that way as long as possible
+        num_repeat_scaled = max(1, round(num_repeat * depth_multiplier))
+    else:
+        # The default for EfficientNet truncates repeats to int via 'ceil'.
+        # Any multiplier > 1.0 will result in an increased depth for every stage.
+        num_repeat_scaled = int(math.ceil(num_repeat * depth_multiplier))
+    # Proportionally distribute repeat count scaling to each block definition in the stage.
+    # Allocation is done in reverse as it results in the first block being less likely to be scaled.
+    # The first block makes less sense to repeat in most of the arch definitions.
+    repeats_scaled = []
+    for r in repeats[::-1]:
+        rs = max(1, round((r / num_repeat * num_repeat_scaled)))
+        repeats_scaled.append(rs)
+        num_repeat -= r
+        num_repeat_scaled -= rs
+    repeats_scaled = repeats_scaled[::-1]
+    # Apply the calculated scaling to each block arg in the stage
+    sa_scaled = []
+    for ba, rep in zip(stack_args, repeats_scaled):
+        sa_scaled.extend([deepcopy(ba) for _ in range(rep)])
+    return sa_scaled
+def decode_arch_def(arch_def, depth_multiplier=1.0, depth_trunc='ceil', experts_multiplier=1, fix_first_last=False):
+    arch_args = []
+    for stack_idx, block_strings in enumerate(arch_def):
+        assert isinstance(block_strings, list)
+        stack_args = []
+        repeats = []
+        for block_str in block_strings:
+            assert isinstance(block_str, str)
+            ba, rep = _decode_block_str(block_str)
+            if ba.get('num_experts', 0) > 0 and experts_multiplier > 1:
+                ba['num_experts'] *= experts_multiplier
+            stack_args.append(ba)
+            repeats.append(rep)
+        if fix_first_last and (stack_idx == 0 or stack_idx == len(arch_def) - 1):
+            arch_args.append(_scale_stage_depth(stack_args, repeats, 1.0, depth_trunc))
+        else:
+            arch_args.append(_scale_stage_depth(stack_args, repeats, depth_multiplier, depth_trunc))
+    return arch_args
+def initialize_weight_goog(m, n='', fix_group_fanout=True):
+    # weight init as per Tensorflow Official impl
+    # https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_model.py
+    if isinstance(m, CondConv2d):
+        fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        if fix_group_fanout:
+            fan_out //= m.groups
+        init_weight_fn = get_condconv_initializer(
+            lambda w: w.data.normal_(0, math.sqrt(2.0 / fan_out)), m.num_experts, m.weight_shape)
+        init_weight_fn(m.weight)
+        if m.bias is not None:
+            m.bias.data.zero_()
+    elif isinstance(m, nn.Conv2d):
+        fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        if fix_group_fanout:
+            fan_out //= m.groups
+        m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+        if m.bias is not None:
+            m.bias.data.zero_()
+    elif isinstance(m, nn.BatchNorm2d):
+        m.weight.data.fill_(1.0)
+        m.bias.data.zero_()
+    elif isinstance(m, nn.Linear):
+        fan_out = m.weight.size(0)  # fan-out
+        fan_in = 0
+        if 'routing_fn' in n:
+            fan_in = m.weight.size(1)
+        init_range = 1.0 / math.sqrt(fan_in + fan_out)
+        m.weight.data.uniform_(-init_range, init_range)
+        m.bias.data.zero_()
+def initialize_weight_default(m, n=''):
+    if isinstance(m, CondConv2d):
+        init_fn = get_condconv_initializer(partial(
+            nn.init.kaiming_normal_, mode='fan_out', nonlinearity='relu'), m.num_experts, m.weight_shape)
+        init_fn(m.weight)
+    elif isinstance(m, nn.Conv2d):
+        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+    elif isinstance(m, nn.BatchNorm2d):
+        m.weight.data.fill_(1.0)
+        m.bias.data.zero_()
+    elif isinstance(m, nn.Linear):
+        nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='linear')

geffnet/gen_efficientnet.py ADDED Viewed

	@@ -0,0 +1,1450 @@

+""" Generic Efficient Networks
+A generic MobileNet class with building blocks to support a variety of models:
+* EfficientNet (B0-B8, L2 + Tensorflow pretrained AutoAug/RandAug/AdvProp/NoisyStudent ports)
+  - EfficientNet: Rethinking Model Scaling for CNNs - https://arxiv.org/abs/1905.11946
+  - CondConv: Conditionally Parameterized Convolutions for Efficient Inference - https://arxiv.org/abs/1904.04971
+  - Adversarial Examples Improve Image Recognition - https://arxiv.org/abs/1911.09665
+  - Self-training with Noisy Student improves ImageNet classification - https://arxiv.org/abs/1911.04252
+* EfficientNet-Lite
+* MixNet (Small, Medium, and Large)
+  - MixConv: Mixed Depthwise Convolutional Kernels - https://arxiv.org/abs/1907.09595
+* MNasNet B1, A1 (SE), Small
+  - MnasNet: Platform-Aware Neural Architecture Search for Mobile - https://arxiv.org/abs/1807.11626
+* FBNet-C
+  - FBNet: Hardware-Aware Efficient ConvNet Design via Differentiable NAS - https://arxiv.org/abs/1812.03443
+* Single-Path NAS Pixel1
+  - Single-Path NAS: Designing Hardware-Efficient ConvNets - https://arxiv.org/abs/1904.02877
+* And likely more...
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch.nn as nn
+import torch.nn.functional as F
+from .config import layer_config_kwargs, is_scriptable
+from .conv2d_layers import select_conv2d
+from .helpers import load_pretrained
+from .efficientnet_builder import *
+__all__ = ['GenEfficientNet', 'mnasnet_050', 'mnasnet_075', 'mnasnet_100', 'mnasnet_b1', 'mnasnet_140',
+           'semnasnet_050', 'semnasnet_075', 'semnasnet_100', 'mnasnet_a1', 'semnasnet_140', 'mnasnet_small',
+           'mobilenetv2_100', 'mobilenetv2_140', 'mobilenetv2_110d', 'mobilenetv2_120d',
+           'fbnetc_100', 'spnasnet_100', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2',  'efficientnet_b3',
+           'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'efficientnet_b8',
+           'efficientnet_l2', 'efficientnet_es', 'efficientnet_em', 'efficientnet_el',
+           'efficientnet_cc_b0_4e', 'efficientnet_cc_b0_8e', 'efficientnet_cc_b1_8e',
+           'efficientnet_lite0', 'efficientnet_lite1', 'efficientnet_lite2', 'efficientnet_lite3', 'efficientnet_lite4',
+           'tf_efficientnet_b0', 'tf_efficientnet_b1', 'tf_efficientnet_b2', 'tf_efficientnet_b3',
+           'tf_efficientnet_b4', 'tf_efficientnet_b5', 'tf_efficientnet_b6', 'tf_efficientnet_b7', 'tf_efficientnet_b8',
+           'tf_efficientnet_b0_ap', 'tf_efficientnet_b1_ap', 'tf_efficientnet_b2_ap', 'tf_efficientnet_b3_ap',
+           'tf_efficientnet_b4_ap', 'tf_efficientnet_b5_ap', 'tf_efficientnet_b6_ap', 'tf_efficientnet_b7_ap',
+           'tf_efficientnet_b8_ap', 'tf_efficientnet_b0_ns', 'tf_efficientnet_b1_ns', 'tf_efficientnet_b2_ns',
+           'tf_efficientnet_b3_ns', 'tf_efficientnet_b4_ns', 'tf_efficientnet_b5_ns', 'tf_efficientnet_b6_ns',
+           'tf_efficientnet_b7_ns', 'tf_efficientnet_l2_ns', 'tf_efficientnet_l2_ns_475',
+           'tf_efficientnet_es', 'tf_efficientnet_em', 'tf_efficientnet_el',
+           'tf_efficientnet_cc_b0_4e', 'tf_efficientnet_cc_b0_8e', 'tf_efficientnet_cc_b1_8e',
+           'tf_efficientnet_lite0', 'tf_efficientnet_lite1', 'tf_efficientnet_lite2', 'tf_efficientnet_lite3',
+           'tf_efficientnet_lite4',
+           'mixnet_s', 'mixnet_m', 'mixnet_l', 'mixnet_xl', 'tf_mixnet_s', 'tf_mixnet_m', 'tf_mixnet_l']
+model_urls = {
+    'mnasnet_050': None,
+    'mnasnet_075': None,
+    'mnasnet_100':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mnasnet_b1-74cb7081.pth',
+    'mnasnet_140': None,
+    'mnasnet_small': None,
+    'semnasnet_050': None,
+    'semnasnet_075': None,
+    'semnasnet_100':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mnasnet_a1-d9418771.pth',
+    'semnasnet_140': None,
+    'mobilenetv2_100':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_100_ra-b33bc2c4.pth',
+    'mobilenetv2_110d':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_110d_ra-77090ade.pth',
+    'mobilenetv2_120d':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_120d_ra-5987e2ed.pth',
+    'mobilenetv2_140':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_140_ra-21a4e913.pth',
+    'fbnetc_100':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/fbnetc_100-c345b898.pth',
+    'spnasnet_100':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/spnasnet_100-048bc3f4.pth',
+    'efficientnet_b0':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b0_ra-3dd342df.pth',
+    'efficientnet_b1':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b1-533bc792.pth',
+    'efficientnet_b2':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b2_ra-bcdf34b7.pth',
+    'efficientnet_b3':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b3_ra2-cf984f9c.pth',
+    'efficientnet_b4': None,
+    'efficientnet_b5': None,
+    'efficientnet_b6': None,
+    'efficientnet_b7': None,
+    'efficientnet_b8': None,
+    'efficientnet_l2': None,
+    'efficientnet_es':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_es_ra-f111e99c.pth',
+    'efficientnet_em': None,
+    'efficientnet_el': None,
+    'efficientnet_cc_b0_4e': None,
+    'efficientnet_cc_b0_8e': None,
+    'efficientnet_cc_b1_8e': None,
+    'efficientnet_lite0': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_lite0_ra-37913777.pth',
+    'efficientnet_lite1': None,
+    'efficientnet_lite2': None,
+    'efficientnet_lite3': None,
+    'efficientnet_lite4': None,
+    'tf_efficientnet_b0':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_aa-827b6e33.pth',
+    'tf_efficientnet_b1':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_aa-ea7a6ee0.pth',
+    'tf_efficientnet_b2':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_aa-60c94f97.pth',
+    'tf_efficientnet_b3':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_aa-84b4657e.pth',
+    'tf_efficientnet_b4':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_aa-818f208c.pth',
+    'tf_efficientnet_b5':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ra-9a3e5369.pth',
+    'tf_efficientnet_b6':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_aa-80ba17e4.pth',
+    'tf_efficientnet_b7':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ra-6c08e654.pth',
+    'tf_efficientnet_b8':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ra-572d5dd9.pth',
+    'tf_efficientnet_b0_ap':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ap-f262efe1.pth',
+    'tf_efficientnet_b1_ap':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_ap-44ef0a3d.pth',
+    'tf_efficientnet_b2_ap':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_ap-2f8e7636.pth',
+    'tf_efficientnet_b3_ap':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_ap-aad25bdd.pth',
+    'tf_efficientnet_b4_ap':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_ap-dedb23e6.pth',
+    'tf_efficientnet_b5_ap':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ap-9e82fae8.pth',
+    'tf_efficientnet_b6_ap':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_ap-4ffb161f.pth',
+    'tf_efficientnet_b7_ap':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ap-ddb28fec.pth',
+    'tf_efficientnet_b8_ap':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth',
+    'tf_efficientnet_b0_ns':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ns-c0e6a31c.pth',
+    'tf_efficientnet_b1_ns':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_ns-99dd0c41.pth',
+    'tf_efficientnet_b2_ns':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_ns-00306e48.pth',
+    'tf_efficientnet_b3_ns':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_ns-9d44bf68.pth',
+    'tf_efficientnet_b4_ns':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_ns-d6313a46.pth',
+    'tf_efficientnet_b5_ns':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ns-6f26d0cf.pth',
+    'tf_efficientnet_b6_ns':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_ns-51548356.pth',
+    'tf_efficientnet_b7_ns':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ns-1dbc32de.pth',
+    'tf_efficientnet_l2_ns_475':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_l2_ns_475-bebbd00a.pth',
+    'tf_efficientnet_l2_ns':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_l2_ns-df73bb44.pth',
+    'tf_efficientnet_es':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_es-ca1afbfe.pth',
+    'tf_efficientnet_em':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_em-e78cfe58.pth',
+    'tf_efficientnet_el':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_el-5143854e.pth',
+    'tf_efficientnet_cc_b0_4e':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_cc_b0_4e-4362b6b2.pth',
+    'tf_efficientnet_cc_b0_8e':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_cc_b0_8e-66184a25.pth',
+    'tf_efficientnet_cc_b1_8e':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_cc_b1_8e-f7c79ae1.pth',
+    'tf_efficientnet_lite0':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite0-0aa007d2.pth',
+    'tf_efficientnet_lite1':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite1-bde8b488.pth',
+    'tf_efficientnet_lite2':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite2-dcccb7df.pth',
+    'tf_efficientnet_lite3':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite3-b733e338.pth',
+    'tf_efficientnet_lite4':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite4-741542c3.pth',
+    'mixnet_s': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_s-a907afbc.pth',
+    'mixnet_m': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_m-4647fc68.pth',
+    'mixnet_l': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_l-5a9a2ed8.pth',
+    'mixnet_xl': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_xl_ra-aac3c00c.pth',
+    'tf_mixnet_s':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mixnet_s-89d3354b.pth',
+    'tf_mixnet_m':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mixnet_m-0f4d8805.pth',
+    'tf_mixnet_l':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mixnet_l-6c92e0c8.pth',
+}
+class GenEfficientNet(nn.Module):
+    """ Generic EfficientNets
+    An implementation of mobile optimized networks that covers:
+      * EfficientNet (B0-B8, L2, CondConv, EdgeTPU)
+      * MixNet (Small, Medium, and Large, XL)
+      * MNASNet A1, B1, and small
+      * FBNet C
+      * Single-Path NAS Pixel1
+    """
+    def __init__(self, block_args, num_classes=1000, in_chans=3, num_features=1280, stem_size=32, fix_stem=False,
+                 channel_multiplier=1.0, channel_divisor=8, channel_min=None,
+                 pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0.,
+                 se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
+                 weight_init='goog'):
+        super(GenEfficientNet, self).__init__()
+        self.drop_rate = drop_rate
+        if not fix_stem:
+            stem_size = round_channels(stem_size, channel_multiplier, channel_divisor, channel_min)
+        self.conv_stem = select_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type)
+        self.bn1 = norm_layer(stem_size, **norm_kwargs)
+        self.act1 = act_layer(inplace=True)
+        in_chs = stem_size
+        builder = EfficientNetBuilder(
+            channel_multiplier, channel_divisor, channel_min,
+            pad_type, act_layer, se_kwargs, norm_layer, norm_kwargs, drop_connect_rate)
+        self.blocks = nn.Sequential(*builder(in_chs, block_args))
+        in_chs = builder.in_chs
+        self.conv_head = select_conv2d(in_chs, num_features, 1, padding=pad_type)
+        self.bn2 = norm_layer(num_features, **norm_kwargs)
+        self.act2 = act_layer(inplace=True)
+        self.global_pool = nn.AdaptiveAvgPool2d(1)
+        self.classifier = nn.Linear(num_features, num_classes)
+        for n, m in self.named_modules():
+            if weight_init == 'goog':
+                initialize_weight_goog(m, n)
+            else:
+                initialize_weight_default(m, n)
+    def features(self, x):
+        x = self.conv_stem(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.blocks(x)
+        x = self.conv_head(x)
+        x = self.bn2(x)
+        x = self.act2(x)
+        return x
+    def as_sequential(self):
+        layers = [self.conv_stem, self.bn1, self.act1]
+        layers.extend(self.blocks)
+        layers.extend([
+            self.conv_head, self.bn2, self.act2,
+            self.global_pool, nn.Flatten(), nn.Dropout(self.drop_rate), self.classifier])
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.features(x)
+        x = self.global_pool(x)
+        x = x.flatten(1)
+        if self.drop_rate > 0.:
+            x = F.dropout(x, p=self.drop_rate, training=self.training)
+        return self.classifier(x)
+def _create_model(model_kwargs, variant, pretrained=False):
+    as_sequential = model_kwargs.pop('as_sequential', False)
+    model = GenEfficientNet(**model_kwargs)
+    if pretrained:
+        load_pretrained(model, model_urls[variant])
+    if as_sequential:
+        model = model.as_sequential()
+    return model
+def _gen_mnasnet_a1(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a mnasnet-a1 model.
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet
+    Paper: https://arxiv.org/pdf/1807.11626.pdf.
+    Args:
+      channel_multiplier: multiplier to number of channels per layer.
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_e1_c16_noskip'],
+        # stage 1, 112x112 in
+        ['ir_r2_k3_s2_e6_c24'],
+        # stage 2, 56x56 in
+        ['ir_r3_k5_s2_e3_c40_se0.25'],
+        # stage 3, 28x28 in
+        ['ir_r4_k3_s2_e6_c80'],
+        # stage 4, 14x14in
+        ['ir_r2_k3_s1_e6_c112_se0.25'],
+        # stage 5, 14x14in
+        ['ir_r3_k5_s2_e6_c160_se0.25'],
+        # stage 6, 7x7 in
+        ['ir_r1_k3_s1_e6_c320'],
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def),
+            stem_size=32,
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'relu'),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_mnasnet_b1(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a mnasnet-b1 model.
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet
+    Paper: https://arxiv.org/pdf/1807.11626.pdf.
+    Args:
+      channel_multiplier: multiplier to number of channels per layer.
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_c16_noskip'],
+        # stage 1, 112x112 in
+        ['ir_r3_k3_s2_e3_c24'],
+        # stage 2, 56x56 in
+        ['ir_r3_k5_s2_e3_c40'],
+        # stage 3, 28x28 in
+        ['ir_r3_k5_s2_e6_c80'],
+        # stage 4, 14x14in
+        ['ir_r2_k3_s1_e6_c96'],
+        # stage 5, 14x14in
+        ['ir_r4_k5_s2_e6_c192'],
+        # stage 6, 7x7 in
+        ['ir_r1_k3_s1_e6_c320_noskip']
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def),
+            stem_size=32,
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'relu'),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_mnasnet_small(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a mnasnet-b1 model.
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet
+    Paper: https://arxiv.org/pdf/1807.11626.pdf.
+    Args:
+      channel_multiplier: multiplier to number of channels per layer.
+    """
+    arch_def = [
+        ['ds_r1_k3_s1_c8'],
+        ['ir_r1_k3_s2_e3_c16'],
+        ['ir_r2_k3_s2_e6_c16'],
+        ['ir_r4_k5_s2_e6_c32_se0.25'],
+        ['ir_r3_k3_s1_e6_c32_se0.25'],
+        ['ir_r3_k5_s2_e6_c88_se0.25'],
+        ['ir_r1_k3_s1_e6_c144']
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def),
+            stem_size=8,
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'relu'),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_mobilenet_v2(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, fix_stem_head=False, pretrained=False, **kwargs):
+    """ Generate MobileNet-V2 network
+    Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v2.py
+    Paper: https://arxiv.org/abs/1801.04381
+    """
+    arch_def = [
+        ['ds_r1_k3_s1_c16'],
+        ['ir_r2_k3_s2_e6_c24'],
+        ['ir_r3_k3_s2_e6_c32'],
+        ['ir_r4_k3_s2_e6_c64'],
+        ['ir_r3_k3_s1_e6_c96'],
+        ['ir_r3_k3_s2_e6_c160'],
+        ['ir_r1_k3_s1_e6_c320'],
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def, depth_multiplier=depth_multiplier, fix_first_last=fix_stem_head),
+            num_features=1280 if fix_stem_head else round_channels(1280, channel_multiplier, 8, None),
+            stem_size=32,
+            fix_stem=fix_stem_head,
+            channel_multiplier=channel_multiplier,
+            norm_kwargs=resolve_bn_args(kwargs),
+            act_layer=nn.ReLU6,
+            **kwargs
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_fbnetc(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """ FBNet-C
+        Paper: https://arxiv.org/abs/1812.03443
+        Ref Impl: https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py
+        NOTE: the impl above does not relate to the 'C' variant here, that was derived from paper,
+        it was used to confirm some building block details
+    """
+    arch_def = [
+        ['ir_r1_k3_s1_e1_c16'],
+        ['ir_r1_k3_s2_e6_c24', 'ir_r2_k3_s1_e1_c24'],
+        ['ir_r1_k5_s2_e6_c32', 'ir_r1_k5_s1_e3_c32', 'ir_r1_k5_s1_e6_c32', 'ir_r1_k3_s1_e6_c32'],
+        ['ir_r1_k5_s2_e6_c64', 'ir_r1_k5_s1_e3_c64', 'ir_r2_k5_s1_e6_c64'],
+        ['ir_r3_k5_s1_e6_c112', 'ir_r1_k5_s1_e3_c112'],
+        ['ir_r4_k5_s2_e6_c184'],
+        ['ir_r1_k3_s1_e6_c352'],
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def),
+            stem_size=16,
+            num_features=1984,  # paper suggests this, but is not 100% clear
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'relu'),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_spnasnet(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates the Single-Path NAS model from search targeted for Pixel1 phone.
+    Paper: https://arxiv.org/abs/1904.02877
+    Args:
+      channel_multiplier: multiplier to number of channels per layer.
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_c16_noskip'],
+        # stage 1, 112x112 in
+        ['ir_r3_k3_s2_e3_c24'],
+        # stage 2, 56x56 in
+        ['ir_r1_k5_s2_e6_c40', 'ir_r3_k3_s1_e3_c40'],
+        # stage 3, 28x28 in
+        ['ir_r1_k5_s2_e6_c80', 'ir_r3_k3_s1_e3_c80'],
+        # stage 4, 14x14in
+        ['ir_r1_k5_s1_e6_c96', 'ir_r3_k5_s1_e3_c96'],
+        # stage 5, 14x14in
+        ['ir_r4_k5_s2_e6_c192'],
+        # stage 6, 7x7 in
+        ['ir_r1_k3_s1_e6_c320_noskip']
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def),
+            stem_size=32,
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'relu'),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_efficientnet(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates an EfficientNet model.
+    Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py
+    Paper: https://arxiv.org/abs/1905.11946
+    EfficientNet params
+    name: (channel_multiplier, depth_multiplier, resolution, dropout_rate)
+    'efficientnet-b0': (1.0, 1.0, 224, 0.2),
+    'efficientnet-b1': (1.0, 1.1, 240, 0.2),
+    'efficientnet-b2': (1.1, 1.2, 260, 0.3),
+    'efficientnet-b3': (1.2, 1.4, 300, 0.3),
+    'efficientnet-b4': (1.4, 1.8, 380, 0.4),
+    'efficientnet-b5': (1.6, 2.2, 456, 0.4),
+    'efficientnet-b6': (1.8, 2.6, 528, 0.5),
+    'efficientnet-b7': (2.0, 3.1, 600, 0.5),
+    'efficientnet-b8': (2.2, 3.6, 672, 0.5),
+    Args:
+      channel_multiplier: multiplier to number of channels per layer
+      depth_multiplier: multiplier to number of repeats per stage
+    """
+    arch_def = [
+        ['ds_r1_k3_s1_e1_c16_se0.25'],
+        ['ir_r2_k3_s2_e6_c24_se0.25'],
+        ['ir_r2_k5_s2_e6_c40_se0.25'],
+        ['ir_r3_k3_s2_e6_c80_se0.25'],
+        ['ir_r3_k5_s1_e6_c112_se0.25'],
+        ['ir_r4_k5_s2_e6_c192_se0.25'],
+        ['ir_r1_k3_s1_e6_c320_se0.25'],
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def, depth_multiplier),
+            num_features=round_channels(1280, channel_multiplier, 8, None),
+            stem_size=32,
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'swish'),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs,
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_efficientnet_edge(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+    arch_def = [
+        # NOTE `fc` is present to override a mismatch between stem channels and in chs not
+        # present in other models
+        ['er_r1_k3_s1_e4_c24_fc24_noskip'],
+        ['er_r2_k3_s2_e8_c32'],
+        ['er_r4_k3_s2_e8_c48'],
+        ['ir_r5_k5_s2_e8_c96'],
+        ['ir_r4_k5_s1_e8_c144'],
+        ['ir_r2_k5_s2_e8_c192'],
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def, depth_multiplier),
+            num_features=round_channels(1280, channel_multiplier, 8, None),
+            stem_size=32,
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'relu'),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs,
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_efficientnet_condconv(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=1, pretrained=False, **kwargs):
+    """Creates an efficientnet-condconv model."""
+    arch_def = [
+      ['ds_r1_k3_s1_e1_c16_se0.25'],
+      ['ir_r2_k3_s2_e6_c24_se0.25'],
+      ['ir_r2_k5_s2_e6_c40_se0.25'],
+      ['ir_r3_k3_s2_e6_c80_se0.25'],
+      ['ir_r3_k5_s1_e6_c112_se0.25_cc4'],
+      ['ir_r4_k5_s2_e6_c192_se0.25_cc4'],
+      ['ir_r1_k3_s1_e6_c320_se0.25_cc4'],
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def, depth_multiplier, experts_multiplier=experts_multiplier),
+            num_features=round_channels(1280, channel_multiplier, 8, None),
+            stem_size=32,
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'swish'),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs,
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates an EfficientNet-Lite model.
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite
+    Paper: https://arxiv.org/abs/1905.11946
+    EfficientNet params
+    name: (channel_multiplier, depth_multiplier, resolution, dropout_rate)
+      'efficientnet-lite0': (1.0, 1.0, 224, 0.2),
+      'efficientnet-lite1': (1.0, 1.1, 240, 0.2),
+      'efficientnet-lite2': (1.1, 1.2, 260, 0.3),
+      'efficientnet-lite3': (1.2, 1.4, 280, 0.3),
+      'efficientnet-lite4': (1.4, 1.8, 300, 0.3),
+    Args:
+      channel_multiplier: multiplier to number of channels per layer
+      depth_multiplier: multiplier to number of repeats per stage
+    """
+    arch_def = [
+        ['ds_r1_k3_s1_e1_c16'],
+        ['ir_r2_k3_s2_e6_c24'],
+        ['ir_r2_k5_s2_e6_c40'],
+        ['ir_r3_k3_s2_e6_c80'],
+        ['ir_r3_k5_s1_e6_c112'],
+        ['ir_r4_k5_s2_e6_c192'],
+        ['ir_r1_k3_s1_e6_c320'],
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def, depth_multiplier, fix_first_last=True),
+            num_features=1280,
+            stem_size=32,
+            fix_stem=True,
+            channel_multiplier=channel_multiplier,
+            act_layer=nn.ReLU6,
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs,
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_mixnet_s(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a MixNet Small model.
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet
+    Paper: https://arxiv.org/abs/1907.09595
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_e1_c16'],  # relu
+        # stage 1, 112x112 in
+        ['ir_r1_k3_a1.1_p1.1_s2_e6_c24', 'ir_r1_k3_a1.1_p1.1_s1_e3_c24'],  # relu
+        # stage 2, 56x56 in
+        ['ir_r1_k3.5.7_s2_e6_c40_se0.5_nsw', 'ir_r3_k3.5_a1.1_p1.1_s1_e6_c40_se0.5_nsw'],  # swish
+        # stage 3, 28x28 in
+        ['ir_r1_k3.5.7_p1.1_s2_e6_c80_se0.25_nsw', 'ir_r2_k3.5_p1.1_s1_e6_c80_se0.25_nsw'],  # swish
+        # stage 4, 14x14in
+        ['ir_r1_k3.5.7_a1.1_p1.1_s1_e6_c120_se0.5_nsw', 'ir_r2_k3.5.7.9_a1.1_p1.1_s1_e3_c120_se0.5_nsw'],  # swish
+        # stage 5, 14x14in
+        ['ir_r1_k3.5.7.9.11_s2_e6_c200_se0.5_nsw', 'ir_r2_k3.5.7.9_p1.1_s1_e6_c200_se0.5_nsw'],  # swish
+        # 7x7
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def),
+            num_features=1536,
+            stem_size=16,
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'relu'),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_mixnet_m(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a MixNet Medium-Large model.
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet
+    Paper: https://arxiv.org/abs/1907.09595
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_e1_c24'],  # relu
+        # stage 1, 112x112 in
+        ['ir_r1_k3.5.7_a1.1_p1.1_s2_e6_c32', 'ir_r1_k3_a1.1_p1.1_s1_e3_c32'],  # relu
+        # stage 2, 56x56 in
+        ['ir_r1_k3.5.7.9_s2_e6_c40_se0.5_nsw', 'ir_r3_k3.5_a1.1_p1.1_s1_e6_c40_se0.5_nsw'],  # swish
+        # stage 3, 28x28 in
+        ['ir_r1_k3.5.7_s2_e6_c80_se0.25_nsw', 'ir_r3_k3.5.7.9_a1.1_p1.1_s1_e6_c80_se0.25_nsw'],  # swish
+        # stage 4, 14x14in
+        ['ir_r1_k3_s1_e6_c120_se0.5_nsw', 'ir_r3_k3.5.7.9_a1.1_p1.1_s1_e3_c120_se0.5_nsw'],  # swish
+        # stage 5, 14x14in
+        ['ir_r1_k3.5.7.9_s2_e6_c200_se0.5_nsw', 'ir_r3_k3.5.7.9_p1.1_s1_e6_c200_se0.5_nsw'],  # swish
+        # 7x7
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def, depth_multiplier, depth_trunc='round'),
+            num_features=1536,
+            stem_size=24,
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'relu'),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def mnasnet_050(pretrained=False, **kwargs):
+    """ MNASNet B1, depth multiplier of 0.5. """
+    model = _gen_mnasnet_b1('mnasnet_050', 0.5, pretrained=pretrained, **kwargs)
+    return model
+def mnasnet_075(pretrained=False, **kwargs):
+    """ MNASNet B1, depth multiplier of 0.75. """
+    model = _gen_mnasnet_b1('mnasnet_075', 0.75, pretrained=pretrained, **kwargs)
+    return model
+def mnasnet_100(pretrained=False, **kwargs):
+    """ MNASNet B1, depth multiplier of 1.0. """
+    model = _gen_mnasnet_b1('mnasnet_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def mnasnet_b1(pretrained=False, **kwargs):
+    """ MNASNet B1, depth multiplier of 1.0. """
+    return mnasnet_100(pretrained, **kwargs)
+def mnasnet_140(pretrained=False, **kwargs):
+    """ MNASNet B1,  depth multiplier of 1.4 """
+    model = _gen_mnasnet_b1('mnasnet_140', 1.4, pretrained=pretrained, **kwargs)
+    return model
+def semnasnet_050(pretrained=False, **kwargs):
+    """ MNASNet A1 (w/ SE), depth multiplier of 0.5 """
+    model = _gen_mnasnet_a1('semnasnet_050', 0.5, pretrained=pretrained, **kwargs)
+    return model
+def semnasnet_075(pretrained=False, **kwargs):
+    """ MNASNet A1 (w/ SE),  depth multiplier of 0.75. """
+    model = _gen_mnasnet_a1('semnasnet_075', 0.75, pretrained=pretrained, **kwargs)
+    return model
+def semnasnet_100(pretrained=False, **kwargs):
+    """ MNASNet A1 (w/ SE), depth multiplier of 1.0. """
+    model = _gen_mnasnet_a1('semnasnet_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def mnasnet_a1(pretrained=False, **kwargs):
+    """ MNASNet A1 (w/ SE), depth multiplier of 1.0. """
+    return semnasnet_100(pretrained, **kwargs)
+def semnasnet_140(pretrained=False, **kwargs):
+    """ MNASNet A1 (w/ SE), depth multiplier of 1.4. """
+    model = _gen_mnasnet_a1('semnasnet_140', 1.4, pretrained=pretrained, **kwargs)
+    return model
+def mnasnet_small(pretrained=False, **kwargs):
+    """ MNASNet Small,  depth multiplier of 1.0. """
+    model = _gen_mnasnet_small('mnasnet_small', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def mobilenetv2_100(pretrained=False, **kwargs):
+    """ MobileNet V2 w/ 1.0 channel multiplier """
+    model = _gen_mobilenet_v2('mobilenetv2_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def mobilenetv2_140(pretrained=False, **kwargs):
+    """ MobileNet V2 w/ 1.4 channel multiplier """
+    model = _gen_mobilenet_v2('mobilenetv2_140', 1.4, pretrained=pretrained, **kwargs)
+    return model
+def mobilenetv2_110d(pretrained=False, **kwargs):
+    """ MobileNet V2 w/ 1.1 channel, 1.2 depth multipliers"""
+    model = _gen_mobilenet_v2(
+        'mobilenetv2_110d', 1.1, depth_multiplier=1.2, fix_stem_head=True, pretrained=pretrained, **kwargs)
+    return model
+def mobilenetv2_120d(pretrained=False, **kwargs):
+    """ MobileNet V2 w/ 1.2 channel, 1.4 depth multipliers """
+    model = _gen_mobilenet_v2(
+        'mobilenetv2_120d', 1.2, depth_multiplier=1.4, fix_stem_head=True, pretrained=pretrained, **kwargs)
+    return model
+def fbnetc_100(pretrained=False, **kwargs):
+    """ FBNet-C """
+    if pretrained:
+        # pretrained model trained with non-default BN epsilon
+        kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    model = _gen_fbnetc('fbnetc_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def spnasnet_100(pretrained=False, **kwargs):
+    """ Single-Path NAS Pixel1"""
+    model = _gen_spnasnet('spnasnet_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_b0(pretrained=False, **kwargs):
+    """ EfficientNet-B0 """
+    # NOTE for train set drop_rate=0.2, drop_connect_rate=0.2
+    model = _gen_efficientnet(
+        'efficientnet_b0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_b1(pretrained=False, **kwargs):
+    """ EfficientNet-B1 """
+    # NOTE for train set drop_rate=0.2, drop_connect_rate=0.2
+    model = _gen_efficientnet(
+        'efficientnet_b1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_b2(pretrained=False, **kwargs):
+    """ EfficientNet-B2 """
+    # NOTE for train set drop_rate=0.3, drop_connect_rate=0.2
+    model = _gen_efficientnet(
+        'efficientnet_b2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_b3(pretrained=False, **kwargs):
+    """ EfficientNet-B3 """
+    # NOTE for train set drop_rate=0.3, drop_connect_rate=0.2
+    model = _gen_efficientnet(
+        'efficientnet_b3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_b4(pretrained=False, **kwargs):
+    """ EfficientNet-B4 """
+    # NOTE for train set drop_rate=0.4, drop_connect_rate=0.2
+    model = _gen_efficientnet(
+        'efficientnet_b4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_b5(pretrained=False, **kwargs):
+    """ EfficientNet-B5 """
+    # NOTE for train set drop_rate=0.4, drop_connect_rate=0.2
+    model = _gen_efficientnet(
+        'efficientnet_b5', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_b6(pretrained=False, **kwargs):
+    """ EfficientNet-B6 """
+    # NOTE for train set drop_rate=0.5, drop_connect_rate=0.2
+    model = _gen_efficientnet(
+        'efficientnet_b6', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_b7(pretrained=False, **kwargs):
+    """ EfficientNet-B7 """
+    # NOTE for train set drop_rate=0.5, drop_connect_rate=0.2
+    model = _gen_efficientnet(
+        'efficientnet_b7', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_b8(pretrained=False, **kwargs):
+    """ EfficientNet-B8 """
+    # NOTE for train set drop_rate=0.5, drop_connect_rate=0.2
+    model = _gen_efficientnet(
+        'efficientnet_b8', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_l2(pretrained=False, **kwargs):
+    """ EfficientNet-L2. """
+    # NOTE for train, drop_rate should be 0.5
+    model = _gen_efficientnet(
+        'efficientnet_l2', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_es(pretrained=False, **kwargs):
+    """ EfficientNet-Edge Small. """
+    model = _gen_efficientnet_edge(
+        'efficientnet_es', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_em(pretrained=False, **kwargs):
+    """ EfficientNet-Edge-Medium. """
+    model = _gen_efficientnet_edge(
+        'efficientnet_em', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_el(pretrained=False, **kwargs):
+    """ EfficientNet-Edge-Large. """
+    model = _gen_efficientnet_edge(
+        'efficientnet_el', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_cc_b0_4e(pretrained=False, **kwargs):
+    """ EfficientNet-CondConv-B0 w/ 8 Experts """
+    # NOTE for train set drop_rate=0.25, drop_connect_rate=0.2
+    model = _gen_efficientnet_condconv(
+        'efficientnet_cc_b0_4e', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_cc_b0_8e(pretrained=False, **kwargs):
+    """ EfficientNet-CondConv-B0 w/ 8 Experts """
+    # NOTE for train set drop_rate=0.25, drop_connect_rate=0.2
+    model = _gen_efficientnet_condconv(
+        'efficientnet_cc_b0_8e', channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=2,
+        pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_cc_b1_8e(pretrained=False, **kwargs):
+    """ EfficientNet-CondConv-B1 w/ 8 Experts """
+    # NOTE for train set drop_rate=0.25, drop_connect_rate=0.2
+    model = _gen_efficientnet_condconv(
+        'efficientnet_cc_b1_8e', channel_multiplier=1.0, depth_multiplier=1.1, experts_multiplier=2,
+        pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_lite0(pretrained=False, **kwargs):
+    """ EfficientNet-Lite0 """
+    model = _gen_efficientnet_lite(
+        'efficientnet_lite0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_lite1(pretrained=False, **kwargs):
+    """ EfficientNet-Lite1 """
+    model = _gen_efficientnet_lite(
+        'efficientnet_lite1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_lite2(pretrained=False, **kwargs):
+    """ EfficientNet-Lite2 """
+    model = _gen_efficientnet_lite(
+        'efficientnet_lite2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_lite3(pretrained=False, **kwargs):
+    """ EfficientNet-Lite3 """
+    model = _gen_efficientnet_lite(
+        'efficientnet_lite3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
+    return model
+def efficientnet_lite4(pretrained=False, **kwargs):
+    """ EfficientNet-Lite4 """
+    model = _gen_efficientnet_lite(
+        'efficientnet_lite4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b0(pretrained=False, **kwargs):
+    """ EfficientNet-B0 AutoAug. Tensorflow compatible variant  """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b1(pretrained=False, **kwargs):
+    """ EfficientNet-B1 AutoAug. Tensorflow compatible variant  """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b2(pretrained=False, **kwargs):
+    """ EfficientNet-B2 AutoAug. Tensorflow compatible variant  """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b3(pretrained=False, **kwargs):
+    """ EfficientNet-B3 AutoAug. Tensorflow compatible variant """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b4(pretrained=False, **kwargs):
+    """ EfficientNet-B4 AutoAug. Tensorflow compatible variant """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b5(pretrained=False, **kwargs):
+    """ EfficientNet-B5 RandAug. Tensorflow compatible variant """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b5', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b6(pretrained=False, **kwargs):
+    """ EfficientNet-B6 AutoAug. Tensorflow compatible variant """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b6', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b7(pretrained=False, **kwargs):
+    """ EfficientNet-B7 RandAug. Tensorflow compatible variant """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b7', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b8(pretrained=False, **kwargs):
+    """ EfficientNet-B8 RandAug. Tensorflow compatible variant """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b8', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b0_ap(pretrained=False, **kwargs):
+    """ EfficientNet-B0 AdvProp. Tensorflow compatible variant
+    Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b0_ap', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b1_ap(pretrained=False, **kwargs):
+    """ EfficientNet-B1 AdvProp. Tensorflow compatible variant
+    Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b1_ap', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b2_ap(pretrained=False, **kwargs):
+    """ EfficientNet-B2 AdvProp. Tensorflow compatible variant
+    Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b2_ap', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b3_ap(pretrained=False, **kwargs):
+    """ EfficientNet-B3 AdvProp. Tensorflow compatible variant
+    Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b3_ap', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b4_ap(pretrained=False, **kwargs):
+    """ EfficientNet-B4 AdvProp. Tensorflow compatible variant
+    Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b4_ap', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b5_ap(pretrained=False, **kwargs):
+    """ EfficientNet-B5 AdvProp. Tensorflow compatible variant
+    Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b5_ap', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b6_ap(pretrained=False, **kwargs):
+    """ EfficientNet-B6 AdvProp. Tensorflow compatible variant
+    Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
+    """
+    # NOTE for train, drop_rate should be 0.5
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b6_ap', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b7_ap(pretrained=False, **kwargs):
+    """ EfficientNet-B7 AdvProp. Tensorflow compatible variant
+    Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
+    """
+    # NOTE for train, drop_rate should be 0.5
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b7_ap', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b8_ap(pretrained=False, **kwargs):
+    """ EfficientNet-B8 AdvProp. Tensorflow compatible variant
+    Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
+    """
+    # NOTE for train, drop_rate should be 0.5
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b8_ap', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b0_ns(pretrained=False, **kwargs):
+    """ EfficientNet-B0 NoisyStudent. Tensorflow compatible variant
+    Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b0_ns', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b1_ns(pretrained=False, **kwargs):
+    """ EfficientNet-B1 NoisyStudent. Tensorflow compatible variant
+    Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b1_ns', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b2_ns(pretrained=False, **kwargs):
+    """ EfficientNet-B2 NoisyStudent. Tensorflow compatible variant
+    Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b2_ns', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b3_ns(pretrained=False, **kwargs):
+    """ EfficientNet-B3 NoisyStudent. Tensorflow compatible variant
+    Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b3_ns', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b4_ns(pretrained=False, **kwargs):
+    """ EfficientNet-B4 NoisyStudent. Tensorflow compatible variant
+    Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b4_ns', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b5_ns(pretrained=False, **kwargs):
+    """ EfficientNet-B5 NoisyStudent. Tensorflow compatible variant
+    Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b5_ns', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b6_ns(pretrained=False, **kwargs):
+    """ EfficientNet-B6 NoisyStudent. Tensorflow compatible variant
+    Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
+    """
+    # NOTE for train, drop_rate should be 0.5
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b6_ns', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_b7_ns(pretrained=False, **kwargs):
+    """ EfficientNet-B7 NoisyStudent. Tensorflow compatible variant
+    Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
+    """
+    # NOTE for train, drop_rate should be 0.5
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_b7_ns', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_l2_ns_475(pretrained=False, **kwargs):
+    """ EfficientNet-L2 NoisyStudent @ 475x475. Tensorflow compatible variant
+    Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
+    """
+    # NOTE for train, drop_rate should be 0.5
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_l2_ns_475', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_l2_ns(pretrained=False, **kwargs):
+    """ EfficientNet-L2 NoisyStudent. Tensorflow compatible variant
+    Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
+    """
+    # NOTE for train, drop_rate should be 0.5
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet(
+        'tf_efficientnet_l2_ns', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_es(pretrained=False, **kwargs):
+    """ EfficientNet-Edge Small. Tensorflow compatible variant  """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_edge(
+        'tf_efficientnet_es', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_em(pretrained=False, **kwargs):
+    """ EfficientNet-Edge-Medium. Tensorflow compatible variant  """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_edge(
+        'tf_efficientnet_em', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_el(pretrained=False, **kwargs):
+    """ EfficientNet-Edge-Large. Tensorflow compatible variant  """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_edge(
+        'tf_efficientnet_el', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_cc_b0_4e(pretrained=False, **kwargs):
+    """ EfficientNet-CondConv-B0 w/ 4 Experts """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_condconv(
+        'tf_efficientnet_cc_b0_4e', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_cc_b0_8e(pretrained=False, **kwargs):
+    """ EfficientNet-CondConv-B0 w/ 8 Experts """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_condconv(
+        'tf_efficientnet_cc_b0_8e', channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=2,
+        pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_cc_b1_8e(pretrained=False, **kwargs):
+    """ EfficientNet-CondConv-B1 w/ 8 Experts """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_condconv(
+        'tf_efficientnet_cc_b1_8e', channel_multiplier=1.0, depth_multiplier=1.1, experts_multiplier=2,
+        pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_lite0(pretrained=False, **kwargs):
+    """ EfficientNet-Lite0. Tensorflow compatible variant  """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_lite(
+        'tf_efficientnet_lite0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_lite1(pretrained=False, **kwargs):
+    """ EfficientNet-Lite1. Tensorflow compatible variant  """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_lite(
+        'tf_efficientnet_lite1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_lite2(pretrained=False, **kwargs):
+    """ EfficientNet-Lite2. Tensorflow compatible variant  """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_lite(
+        'tf_efficientnet_lite2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_lite3(pretrained=False, **kwargs):
+    """ EfficientNet-Lite3. Tensorflow compatible variant """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_lite(
+        'tf_efficientnet_lite3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
+    return model
+def tf_efficientnet_lite4(pretrained=False, **kwargs):
+    """ EfficientNet-Lite4. Tensorflow compatible variant """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_efficientnet_lite(
+        'tf_efficientnet_lite4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
+    return model
+def mixnet_s(pretrained=False, **kwargs):
+    """Creates a MixNet Small model.
+    """
+    # NOTE for train set drop_rate=0.2
+    model = _gen_mixnet_s(
+        'mixnet_s', channel_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def mixnet_m(pretrained=False, **kwargs):
+    """Creates a MixNet Medium model.
+    """
+    # NOTE for train set drop_rate=0.25
+    model = _gen_mixnet_m(
+        'mixnet_m', channel_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def mixnet_l(pretrained=False, **kwargs):
+    """Creates a MixNet Large model.
+    """
+    # NOTE for train set drop_rate=0.25
+    model = _gen_mixnet_m(
+        'mixnet_l', channel_multiplier=1.3, pretrained=pretrained, **kwargs)
+    return model
+def mixnet_xl(pretrained=False, **kwargs):
+    """Creates a MixNet Extra-Large model.
+    Not a paper spec, experimental def by RW w/ depth scaling.
+    """
+    # NOTE for train set drop_rate=0.25, drop_connect_rate=0.2
+    model = _gen_mixnet_m(
+        'mixnet_xl', channel_multiplier=1.6, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
+    return model
+def mixnet_xxl(pretrained=False, **kwargs):
+    """Creates a MixNet Double Extra Large model.
+    Not a paper spec, experimental def by RW w/ depth scaling.
+    """
+    # NOTE for train set drop_rate=0.3, drop_connect_rate=0.2
+    model = _gen_mixnet_m(
+        'mixnet_xxl', channel_multiplier=2.4, depth_multiplier=1.3, pretrained=pretrained, **kwargs)
+    return model
+def tf_mixnet_s(pretrained=False, **kwargs):
+    """Creates a MixNet Small model. Tensorflow compatible variant
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_mixnet_s(
+        'tf_mixnet_s', channel_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_mixnet_m(pretrained=False, **kwargs):
+    """Creates a MixNet Medium model. Tensorflow compatible variant
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_mixnet_m(
+        'tf_mixnet_m', channel_multiplier=1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_mixnet_l(pretrained=False, **kwargs):
+    """Creates a MixNet Large model. Tensorflow compatible variant
+    """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_mixnet_m(
+        'tf_mixnet_l', channel_multiplier=1.3, pretrained=pretrained, **kwargs)
+    return model

geffnet/helpers.py ADDED Viewed

	@@ -0,0 +1,71 @@

+""" Checkpoint loading / state_dict helpers
+Copyright 2020 Ross Wightman
+"""
+import torch
+import os
+from collections import OrderedDict
+try:
+    from torch.hub import load_state_dict_from_url
+except ImportError:
+    from torch.utils.model_zoo import load_url as load_state_dict_from_url
+def load_checkpoint(model, checkpoint_path):
+    if checkpoint_path and os.path.isfile(checkpoint_path):
+        print("=> Loading checkpoint '{}'".format(checkpoint_path))
+        checkpoint = torch.load(checkpoint_path)
+        if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
+            new_state_dict = OrderedDict()
+            for k, v in checkpoint['state_dict'].items():
+                if k.startswith('module'):
+                    name = k[7:]  # remove `module.`
+                else:
+                    name = k
+                new_state_dict[name] = v
+            model.load_state_dict(new_state_dict)
+        else:
+            model.load_state_dict(checkpoint)
+        print("=> Loaded checkpoint '{}'".format(checkpoint_path))
+    else:
+        print("=> Error: No checkpoint found at '{}'".format(checkpoint_path))
+        raise FileNotFoundError()
+def load_pretrained(model, url, filter_fn=None, strict=True):
+    if not url:
+        print("=> Warning: Pretrained model URL is empty, using random initialization.")
+        return
+    state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu')
+    input_conv = 'conv_stem'
+    classifier = 'classifier'
+    in_chans = getattr(model, input_conv).weight.shape[1]
+    num_classes = getattr(model, classifier).weight.shape[0]
+    input_conv_weight = input_conv + '.weight'
+    pretrained_in_chans = state_dict[input_conv_weight].shape[1]
+    if in_chans != pretrained_in_chans:
+        if in_chans == 1:
+            print('=> Converting pretrained input conv {} from {} to 1 channel'.format(
+                input_conv_weight, pretrained_in_chans))
+            conv1_weight = state_dict[input_conv_weight]
+            state_dict[input_conv_weight] = conv1_weight.sum(dim=1, keepdim=True)
+        else:
+            print('=> Discarding pretrained input conv {} since input channel count != {}'.format(
+                input_conv_weight, pretrained_in_chans))
+            del state_dict[input_conv_weight]
+            strict = False
+    classifier_weight = classifier + '.weight'
+    pretrained_num_classes = state_dict[classifier_weight].shape[0]
+    if num_classes != pretrained_num_classes:
+        print('=> Discarding pretrained classifier since num_classes != {}'.format(pretrained_num_classes))
+        del state_dict[classifier_weight]
+        del state_dict[classifier + '.bias']
+        strict = False
+    if filter_fn is not None:
+        state_dict = filter_fn(state_dict)
+    model.load_state_dict(state_dict, strict=strict)

geffnet/mobilenetv3.py ADDED Viewed

	@@ -0,0 +1,364 @@

+""" MobileNet-V3
+A PyTorch impl of MobileNet-V3, compatible with TF weights from official impl.
+Paper: Searching for MobileNetV3 - https://arxiv.org/abs/1905.02244
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch.nn as nn
+import torch.nn.functional as F
+from .activations import get_act_fn, get_act_layer, HardSwish
+from .config import layer_config_kwargs
+from .conv2d_layers import select_conv2d
+from .helpers import load_pretrained
+from .efficientnet_builder import *
+__all__ = ['mobilenetv3_rw', 'mobilenetv3_large_075', 'mobilenetv3_large_100', 'mobilenetv3_large_minimal_100',
+           'mobilenetv3_small_075', 'mobilenetv3_small_100', 'mobilenetv3_small_minimal_100',
+           'tf_mobilenetv3_large_075', 'tf_mobilenetv3_large_100', 'tf_mobilenetv3_large_minimal_100',
+           'tf_mobilenetv3_small_075', 'tf_mobilenetv3_small_100', 'tf_mobilenetv3_small_minimal_100']
+model_urls = {
+    'mobilenetv3_rw':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_100-35495452.pth',
+    'mobilenetv3_large_075': None,
+    'mobilenetv3_large_100':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_large_100_ra-f55367f5.pth',
+    'mobilenetv3_large_minimal_100': None,
+    'mobilenetv3_small_075': None,
+    'mobilenetv3_small_100': None,
+    'mobilenetv3_small_minimal_100': None,
+    'tf_mobilenetv3_large_075':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_075-150ee8b0.pth',
+    'tf_mobilenetv3_large_100':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_100-427764d5.pth',
+    'tf_mobilenetv3_large_minimal_100':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_minimal_100-8596ae28.pth',
+    'tf_mobilenetv3_small_075':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_075-da427f52.pth',
+    'tf_mobilenetv3_small_100':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_100-37f49e2b.pth',
+    'tf_mobilenetv3_small_minimal_100':
+        'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_minimal_100-922a7843.pth',
+}
+class MobileNetV3(nn.Module):
+    """ MobileNet-V3
+    A this model utilizes the MobileNet-v3 specific 'efficient head', where global pooling is done before the
+    head convolution without a final batch-norm layer before the classifier.
+    Paper: https://arxiv.org/abs/1905.02244
+    """
+    def __init__(self, block_args, num_classes=1000, in_chans=3, stem_size=16, num_features=1280, head_bias=True,
+                 channel_multiplier=1.0, pad_type='', act_layer=HardSwish, drop_rate=0., drop_connect_rate=0.,
+                 se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, weight_init='goog'):
+        super(MobileNetV3, self).__init__()
+        self.drop_rate = drop_rate
+        stem_size = round_channels(stem_size, channel_multiplier)
+        self.conv_stem = select_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type)
+        self.bn1 = nn.BatchNorm2d(stem_size, **norm_kwargs)
+        self.act1 = act_layer(inplace=True)
+        in_chs = stem_size
+        builder = EfficientNetBuilder(
+            channel_multiplier, pad_type=pad_type, act_layer=act_layer, se_kwargs=se_kwargs,
+            norm_layer=norm_layer, norm_kwargs=norm_kwargs, drop_connect_rate=drop_connect_rate)
+        self.blocks = nn.Sequential(*builder(in_chs, block_args))
+        in_chs = builder.in_chs
+        self.global_pool = nn.AdaptiveAvgPool2d(1)
+        self.conv_head = select_conv2d(in_chs, num_features, 1, padding=pad_type, bias=head_bias)
+        self.act2 = act_layer(inplace=True)
+        self.classifier = nn.Linear(num_features, num_classes)
+        for m in self.modules():
+            if weight_init == 'goog':
+                initialize_weight_goog(m)
+            else:
+                initialize_weight_default(m)
+    def as_sequential(self):
+        layers = [self.conv_stem, self.bn1, self.act1]
+        layers.extend(self.blocks)
+        layers.extend([
+            self.global_pool, self.conv_head, self.act2,
+            nn.Flatten(), nn.Dropout(self.drop_rate), self.classifier])
+        return nn.Sequential(*layers)
+    def features(self, x):
+        x = self.conv_stem(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.blocks(x)
+        x = self.global_pool(x)
+        x = self.conv_head(x)
+        x = self.act2(x)
+        return x
+    def forward(self, x):
+        x = self.features(x)
+        x = x.flatten(1)
+        if self.drop_rate > 0.:
+            x = F.dropout(x, p=self.drop_rate, training=self.training)
+        return self.classifier(x)
+def _create_model(model_kwargs, variant, pretrained=False):
+    as_sequential = model_kwargs.pop('as_sequential', False)
+    model = MobileNetV3(**model_kwargs)
+    if pretrained and model_urls[variant]:
+        load_pretrained(model, model_urls[variant])
+    if as_sequential:
+        model = model.as_sequential()
+    return model
+def _gen_mobilenet_v3_rw(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a MobileNet-V3 model (RW variant).
+    Paper: https://arxiv.org/abs/1905.02244
+    This was my first attempt at reproducing the MobileNet-V3 from paper alone. It came close to the
+    eventual Tensorflow reference impl but has a few differences:
+    1. This model has no bias on the head convolution
+    2. This model forces no residual (noskip) on the first DWS block, this is different than MnasNet
+    3. This model always uses ReLU for the SE activation layer, other models in the family inherit their act layer
+       from their parent block
+    4. This model does not enforce divisible by 8 limitation on the SE reduction channel count
+    Overall the changes are fairly minor and result in a very small parameter count difference and no
+    top-1/5
+    Args:
+      channel_multiplier: multiplier to number of channels per layer.
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_e1_c16_nre_noskip'],  # relu
+        # stage 1, 112x112 in
+        ['ir_r1_k3_s2_e4_c24_nre', 'ir_r1_k3_s1_e3_c24_nre'],  # relu
+        # stage 2, 56x56 in
+        ['ir_r3_k5_s2_e3_c40_se0.25_nre'],  # relu
+        # stage 3, 28x28 in
+        ['ir_r1_k3_s2_e6_c80', 'ir_r1_k3_s1_e2.5_c80', 'ir_r2_k3_s1_e2.3_c80'],  # hard-swish
+        # stage 4, 14x14in
+        ['ir_r2_k3_s1_e6_c112_se0.25'],  # hard-swish
+        # stage 5, 14x14in
+        ['ir_r3_k5_s2_e6_c160_se0.25'],  # hard-swish
+        # stage 6, 7x7 in
+        ['cn_r1_k1_s1_c960'],  # hard-swish
+    ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def),
+            head_bias=False,  # one of my mistakes
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, 'hard_swish'),
+            se_kwargs=dict(gate_fn=get_act_fn('hard_sigmoid'), reduce_mid=True),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs,
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a MobileNet-V3 large/small/minimal models.
+    Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v3.py
+    Paper: https://arxiv.org/abs/1905.02244
+    Args:
+      channel_multiplier: multiplier to number of channels per layer.
+    """
+    if 'small' in variant:
+        num_features = 1024
+        if 'minimal' in variant:
+            act_layer = 'relu'
+            arch_def = [
+                # stage 0, 112x112 in
+                ['ds_r1_k3_s2_e1_c16'],
+                # stage 1, 56x56 in
+                ['ir_r1_k3_s2_e4.5_c24', 'ir_r1_k3_s1_e3.67_c24'],
+                # stage 2, 28x28 in
+                ['ir_r1_k3_s2_e4_c40', 'ir_r2_k3_s1_e6_c40'],
+                # stage 3, 14x14 in
+                ['ir_r2_k3_s1_e3_c48'],
+                # stage 4, 14x14in
+                ['ir_r3_k3_s2_e6_c96'],
+                # stage 6, 7x7 in
+                ['cn_r1_k1_s1_c576'],
+            ]
+        else:
+            act_layer = 'hard_swish'
+            arch_def = [
+                # stage 0, 112x112 in
+                ['ds_r1_k3_s2_e1_c16_se0.25_nre'],  # relu
+                # stage 1, 56x56 in
+                ['ir_r1_k3_s2_e4.5_c24_nre', 'ir_r1_k3_s1_e3.67_c24_nre'],  # relu
+                # stage 2, 28x28 in
+                ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r2_k5_s1_e6_c40_se0.25'],  # hard-swish
+                # stage 3, 14x14 in
+                ['ir_r2_k5_s1_e3_c48_se0.25'],  # hard-swish
+                # stage 4, 14x14in
+                ['ir_r3_k5_s2_e6_c96_se0.25'],  # hard-swish
+                # stage 6, 7x7 in
+                ['cn_r1_k1_s1_c576'],  # hard-swish
+            ]
+    else:
+        num_features = 1280
+        if 'minimal' in variant:
+            act_layer = 'relu'
+            arch_def = [
+                # stage 0, 112x112 in
+                ['ds_r1_k3_s1_e1_c16'],
+                # stage 1, 112x112 in
+                ['ir_r1_k3_s2_e4_c24', 'ir_r1_k3_s1_e3_c24'],
+                # stage 2, 56x56 in
+                ['ir_r3_k3_s2_e3_c40'],
+                # stage 3, 28x28 in
+                ['ir_r1_k3_s2_e6_c80', 'ir_r1_k3_s1_e2.5_c80', 'ir_r2_k3_s1_e2.3_c80'],
+                # stage 4, 14x14in
+                ['ir_r2_k3_s1_e6_c112'],
+                # stage 5, 14x14in
+                ['ir_r3_k3_s2_e6_c160'],
+                # stage 6, 7x7 in
+                ['cn_r1_k1_s1_c960'],
+            ]
+        else:
+            act_layer = 'hard_swish'
+            arch_def = [
+                # stage 0, 112x112 in
+                ['ds_r1_k3_s1_e1_c16_nre'],  # relu
+                # stage 1, 112x112 in
+                ['ir_r1_k3_s2_e4_c24_nre', 'ir_r1_k3_s1_e3_c24_nre'],  # relu
+                # stage 2, 56x56 in
+                ['ir_r3_k5_s2_e3_c40_se0.25_nre'],  # relu
+                # stage 3, 28x28 in
+                ['ir_r1_k3_s2_e6_c80', 'ir_r1_k3_s1_e2.5_c80', 'ir_r2_k3_s1_e2.3_c80'],  # hard-swish
+                # stage 4, 14x14in
+                ['ir_r2_k3_s1_e6_c112_se0.25'],  # hard-swish
+                # stage 5, 14x14in
+                ['ir_r3_k5_s2_e6_c160_se0.25'],  # hard-swish
+                # stage 6, 7x7 in
+                ['cn_r1_k1_s1_c960'],  # hard-swish
+            ]
+    with layer_config_kwargs(kwargs):
+        model_kwargs = dict(
+            block_args=decode_arch_def(arch_def),
+            num_features=num_features,
+            stem_size=16,
+            channel_multiplier=channel_multiplier,
+            act_layer=resolve_act_layer(kwargs, act_layer),
+            se_kwargs=dict(
+                act_layer=get_act_layer('relu'), gate_fn=get_act_fn('hard_sigmoid'), reduce_mid=True, divisor=8),
+            norm_kwargs=resolve_bn_args(kwargs),
+            **kwargs,
+        )
+        model = _create_model(model_kwargs, variant, pretrained)
+    return model
+def mobilenetv3_rw(pretrained=False, **kwargs):
+    """ MobileNet-V3 RW
+    Attn: See note in gen function for this variant.
+    """
+    # NOTE for train set drop_rate=0.2
+    if pretrained:
+        # pretrained model trained with non-default BN epsilon
+        kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    model = _gen_mobilenet_v3_rw('mobilenetv3_rw', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def mobilenetv3_large_075(pretrained=False, **kwargs):
+    """ MobileNet V3 Large 0.75"""
+    # NOTE for train set drop_rate=0.2
+    model = _gen_mobilenet_v3('mobilenetv3_large_075', 0.75, pretrained=pretrained, **kwargs)
+    return model
+def mobilenetv3_large_100(pretrained=False, **kwargs):
+    """ MobileNet V3 Large 1.0 """
+    # NOTE for train set drop_rate=0.2
+    model = _gen_mobilenet_v3('mobilenetv3_large_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def mobilenetv3_large_minimal_100(pretrained=False, **kwargs):
+    """ MobileNet V3 Large (Minimalistic) 1.0 """
+    # NOTE for train set drop_rate=0.2
+    model = _gen_mobilenet_v3('mobilenetv3_large_minimal_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def mobilenetv3_small_075(pretrained=False, **kwargs):
+    """ MobileNet V3 Small 0.75 """
+    model = _gen_mobilenet_v3('mobilenetv3_small_075', 0.75, pretrained=pretrained, **kwargs)
+    return model
+def mobilenetv3_small_100(pretrained=False, **kwargs):
+    """ MobileNet V3 Small 1.0 """
+    model = _gen_mobilenet_v3('mobilenetv3_small_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def mobilenetv3_small_minimal_100(pretrained=False, **kwargs):
+    """ MobileNet V3 Small (Minimalistic) 1.0 """
+    model = _gen_mobilenet_v3('mobilenetv3_small_minimal_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_mobilenetv3_large_075(pretrained=False, **kwargs):
+    """ MobileNet V3 Large 0.75. Tensorflow compat variant. """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_mobilenet_v3('tf_mobilenetv3_large_075', 0.75, pretrained=pretrained, **kwargs)
+    return model
+def tf_mobilenetv3_large_100(pretrained=False, **kwargs):
+    """ MobileNet V3 Large 1.0. Tensorflow compat variant. """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_mobilenet_v3('tf_mobilenetv3_large_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_mobilenetv3_large_minimal_100(pretrained=False, **kwargs):
+    """ MobileNet V3 Large Minimalistic 1.0. Tensorflow compat variant. """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_mobilenet_v3('tf_mobilenetv3_large_minimal_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_mobilenetv3_small_075(pretrained=False, **kwargs):
+    """ MobileNet V3 Small 0.75. Tensorflow compat variant. """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_mobilenet_v3('tf_mobilenetv3_small_075', 0.75, pretrained=pretrained, **kwargs)
+    return model
+def tf_mobilenetv3_small_100(pretrained=False, **kwargs):
+    """ MobileNet V3 Small 1.0. Tensorflow compat variant."""
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_mobilenet_v3('tf_mobilenetv3_small_100', 1.0, pretrained=pretrained, **kwargs)
+    return model
+def tf_mobilenetv3_small_minimal_100(pretrained=False, **kwargs):
+    """ MobileNet V3 Small Minimalistic 1.0. Tensorflow compat variant. """
+    kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
+    kwargs['pad_type'] = 'same'
+    model = _gen_mobilenet_v3('tf_mobilenetv3_small_minimal_100', 1.0, pretrained=pretrained, **kwargs)
+    return model

geffnet/model_factory.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from .config import set_layer_config
+from .helpers import load_checkpoint
+from .gen_efficientnet import *
+from .mobilenetv3 import *
+def create_model(
+        model_name='mnasnet_100',
+        pretrained=None,
+        num_classes=1000,
+        in_chans=3,
+        checkpoint_path='',
+        **kwargs):
+    model_kwargs = dict(num_classes=num_classes, in_chans=in_chans, pretrained=pretrained, **kwargs)
+    if model_name in globals():
+        create_fn = globals()[model_name]
+        model = create_fn(**model_kwargs)
+    else:
+        raise RuntimeError('Unknown model (%s)' % model_name)
+    if checkpoint_path and not pretrained:
+        load_checkpoint(model, checkpoint_path)
+    return model

geffnet/version.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __version__ = '1.0.2'

requirements.txt CHANGED Viewed

@@ -109,12 +109,13 @@ sympy==1.12.1
 tokenizers==0.15.2
 tomlkit==0.12.0
 toolz==0.12.1
-torch==2.0.1
-torchvision==v0.15.2
 tqdm==4.66.4
 transformers==4.36.1
 trimesh==4.0.5
-triton==2.0.0
 typer==0.12.3
 typing-inspect==0.9.0
 typing_extensions==4.11.0
@@ -126,10 +127,8 @@ uvloop==0.19.0
 watchfiles==0.22.0
 websockets==11.0.3
 wrapt==1.16.0
-xformers==0.0.20
 xxhash==3.4.1
 yarl==1.9.4
 zipp==3.19.1
 einops==0.7.0
-opencv-python-headless==4.8.1.78
-geffnet==1.0.2

 tokenizers==0.15.2
 tomlkit==0.12.0
 toolz==0.12.1
+torch==2.2.0
+torchvision==0.18.0
+xformers==0.0.24
+triton==2.2.0
 tqdm==4.66.4
 transformers==4.36.1
 trimesh==4.0.5
 typer==0.12.3
 typing-inspect==0.9.0
 typing_extensions==4.11.0
 watchfiles==0.22.0
 websockets==11.0.3
 wrapt==1.16.0
 xxhash==3.4.1
 yarl==1.9.4
 zipp==3.19.1
 einops==0.7.0
+opencv-python-headless==4.8.1.78