diff --git a/docs/conf.py b/docs/conf.py index 82583c6b..4cc70a6b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -100,7 +100,6 @@ def get_version(): "timm", "cv2", "PIL", - "pretrainedmodels", "torchvision", "segmentation_models_pytorch.encoders", "segmentation_models_pytorch.utils", diff --git a/licenses/LICENSES.md b/licenses/LICENSES.md index 06f36241..e51ad8d0 100644 --- a/licenses/LICENSES.md +++ b/licenses/LICENSES.md @@ -13,14 +13,18 @@ The majority of the code is licensed under the [MIT License](LICENSE). However, * [segmentation_models_pytorch/encoders/mix_transformer.py](https://github.com/qubvel/segmentation_models.pytorch/blob/main/segmentation_models_pytorch/encoders/mix_transformer.py) * [LICENSE_nvidia](LICENSE_nvidia.md) - - Apple License * Applies to the MobileOne encoder * [segmentation_models_pytorch/encoders/mobileone.py](https://github.com/qubvel/segmentation_models.pytorch/blob/main/segmentation_models_pytorch/encoders/mobileone.py) * [LICENSE_apple](LICENSE_apple.md) - BSD 3-Clause License - * Applies to the DeepLabV3 decoder + * Applies to several encoders and the DeepLabV3 decoder + * [segmentation_models_pytorch/encoders/_dpn.py](https://github.com/qubvel/segmentation_models.pytorch/blob/main/segmentation_models_pytorch/encoders/_dpn.py) + * [segmentation_models_pytorch/encoders/_inceptionresnetv2.py](https://github.com/qubvel/segmentation_models.pytorch/blob/main/segmentation_models_pytorch/encoders/_inceptionresnetv2.py) + * [segmentation_models_pytorch/encoders/_inceptionv4.py](https://github.com/qubvel/segmentation_models.pytorch/blob/main/segmentation_models_pytorch/encoders/_inceptionv4.py) + * [segmentation_models_pytorch/encoders/_senet.py](https://github.com/qubvel/segmentation_models.pytorch/blob/main/segmentation_models_pytorch/encoders/_senet.py) + * [segmentation_models_pytorch/encoders/_xception.py](https://github.com/qubvel/segmentation_models.pytorch/blob/main/segmentation_models_pytorch/encoders/_xception.py) * [segmentation_models_pytorch/decoders/deeplabv3/decoder.py](https://github.com/qubvel/segmentation_models.pytorch/blob/main/segmentation_models_pytorch/decoders/deeplabv3/decoder.py) - Apache-2.0 License diff --git a/pyproject.toml b/pyproject.toml index 492f7ef9..f3e55a96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,9 +20,7 @@ dependencies = [ 'huggingface-hub>=0.24', 'numpy>=1.19.3', 'pillow>=8', - 'pretrainedmodels>=0.7.1', 'safetensors>=0.3.1', - 'six>=1.5', 'timm>=0.9', 'torch>=1.8', 'torchvision>=0.9', diff --git a/requirements/minimum.old b/requirements/minimum.old index 1adb97f5..678f83f4 100644 --- a/requirements/minimum.old +++ b/requirements/minimum.old @@ -1,9 +1,7 @@ huggingface-hub==0.24.0 numpy==1.19.3 pillow==8.0.0 -pretrainedmodels==0.7.1 safetensors==0.3.1 -six==1.5.0 timm==0.9.0 torch==1.9.0 torchvision==0.10.0 diff --git a/requirements/required.txt b/requirements/required.txt index 6864e1f6..cf3db498 100644 --- a/requirements/required.txt +++ b/requirements/required.txt @@ -1,9 +1,7 @@ huggingface_hub==0.27.1 numpy==2.2.1 pillow==11.1.0 -pretrainedmodels==0.7.4 safetensors==0.5.2 -six==1.17.0 timm==1.0.13 torch==2.5.1 torchvision==0.20.1 diff --git a/segmentation_models_pytorch/__init__.py b/segmentation_models_pytorch/__init__.py index f1807836..8a1e17fe 100644 --- a/segmentation_models_pytorch/__init__.py +++ b/segmentation_models_pytorch/__init__.py @@ -1,5 +1,3 @@ -import warnings - from . import datasets from . import encoders from . import decoders @@ -24,12 +22,6 @@ from typing import Optional as _Optional import torch as _torch -# Suppress the specific SyntaxWarning for `pretrainedmodels` -warnings.filterwarnings("ignore", message="is with a literal", category=SyntaxWarning) -warnings.filterwarnings( - "ignore", message=r'"is" with \'str\' literal.*', category=SyntaxWarning -) # for python >= 3.12 - _MODEL_ARCHITECTURES = [ Unet, UnetPlusPlus, diff --git a/segmentation_models_pytorch/encoders/_dpn.py b/segmentation_models_pytorch/encoders/_dpn.py new file mode 100644 index 00000000..5ab965ca --- /dev/null +++ b/segmentation_models_pytorch/encoders/_dpn.py @@ -0,0 +1,461 @@ +""" PyTorch implementation of DualPathNetworks +Ported to PyTorch by [Ross Wightman](https://github.com/rwightman/pytorch-dpn-pretrained) + +Based on original MXNet implementation https://github.com/cypw/DPNs with +many ideas from another PyTorch implementation https://github.com/oyam/pytorch-DPNs. + +This implementation is compatible with the pretrained weights +from cypw's MXNet implementation. +""" +import os +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as model_zoo +from collections import OrderedDict + +__all__ = ['DPN', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107'] + +pretrained_settings = { + 'dpn68': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn68-4af7d88d2.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [124 / 255, 117 / 255, 104 / 255], + 'std': [1 / (.0167 * 255)] * 3, + 'num_classes': 1000 + } + }, + 'dpn68b': { + 'imagenet+5k': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn68b_extra-363ab9c19.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [124 / 255, 117 / 255, 104 / 255], + 'std': [1 / (.0167 * 255)] * 3, + 'num_classes': 1000 + } + }, + 'dpn92': { + # 'imagenet': { + # 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn68-66bebafa7.pth', + # 'input_space': 'RGB', + # 'input_size': [3, 224, 224], + # 'input_range': [0, 1], + # 'mean': [124 / 255, 117 / 255, 104 / 255], + # 'std': [1 / (.0167 * 255)] * 3, + # 'num_classes': 1000 + # }, + 'imagenet+5k': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn92_extra-fda993c95.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [124 / 255, 117 / 255, 104 / 255], + 'std': [1 / (.0167 * 255)] * 3, + 'num_classes': 1000 + } + }, + 'dpn98': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn98-722954780.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [124 / 255, 117 / 255, 104 / 255], + 'std': [1 / (.0167 * 255)] * 3, + 'num_classes': 1000 + } + }, + 'dpn131': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn131-7af84be88.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [124 / 255, 117 / 255, 104 / 255], + 'std': [1 / (.0167 * 255)] * 3, + 'num_classes': 1000 + } + }, + 'dpn107': { + 'imagenet+5k': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn107_extra-b7f9f4cc9.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [124 / 255, 117 / 255, 104 / 255], + 'std': [1 / (.0167 * 255)] * 3, + 'num_classes': 1000 + } + } +} + +def dpn68(num_classes=1000, pretrained='imagenet'): + model = DPN( + small=True, num_init_features=10, k_r=128, groups=32, + k_sec=(3, 4, 12, 3), inc_sec=(16, 32, 32, 64), + num_classes=num_classes, test_time_pool=True) + if pretrained: + settings = pretrained_settings['dpn68'][pretrained] + assert num_classes == settings['num_classes'], \ + "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) + + model.load_state_dict(model_zoo.load_url(settings['url'])) + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + return model + +def dpn68b(num_classes=1000, pretrained='imagenet+5k'): + model = DPN( + small=True, num_init_features=10, k_r=128, groups=32, + b=True, k_sec=(3, 4, 12, 3), inc_sec=(16, 32, 32, 64), + num_classes=num_classes, test_time_pool=True) + if pretrained: + settings = pretrained_settings['dpn68b'][pretrained] + assert num_classes == settings['num_classes'], \ + "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) + + model.load_state_dict(model_zoo.load_url(settings['url'])) + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + return model + +def dpn92(num_classes=1000, pretrained='imagenet+5k'): + model = DPN( + num_init_features=64, k_r=96, groups=32, + k_sec=(3, 4, 20, 3), inc_sec=(16, 32, 24, 128), + num_classes=num_classes, test_time_pool=True) + if pretrained: + settings = pretrained_settings['dpn92'][pretrained] + assert num_classes == settings['num_classes'], \ + "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) + + model.load_state_dict(model_zoo.load_url(settings['url'])) + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + return model + +def dpn98(num_classes=1000, pretrained='imagenet'): + model = DPN( + num_init_features=96, k_r=160, groups=40, + k_sec=(3, 6, 20, 3), inc_sec=(16, 32, 32, 128), + num_classes=num_classes, test_time_pool=True) + if pretrained: + settings = pretrained_settings['dpn98'][pretrained] + assert num_classes == settings['num_classes'], \ + "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) + + model.load_state_dict(model_zoo.load_url(settings['url'])) + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + return model + +def dpn131(num_classes=1000, pretrained='imagenet'): + model = DPN( + num_init_features=128, k_r=160, groups=40, + k_sec=(4, 8, 28, 3), inc_sec=(16, 32, 32, 128), + num_classes=num_classes, test_time_pool=True) + if pretrained: + settings = pretrained_settings['dpn131'][pretrained] + assert num_classes == settings['num_classes'], \ + "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) + + model.load_state_dict(model_zoo.load_url(settings['url'])) + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + return model + +def dpn107(num_classes=1000, pretrained='imagenet+5k'): + model = DPN( + num_init_features=128, k_r=200, groups=50, + k_sec=(4, 8, 20, 3), inc_sec=(20, 64, 64, 128), + num_classes=num_classes, test_time_pool=True) + if pretrained: + settings = pretrained_settings['dpn107'][pretrained] + assert num_classes == settings['num_classes'], \ + "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) + + model.load_state_dict(model_zoo.load_url(settings['url'])) + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + return model + + +class CatBnAct(nn.Module): + def __init__(self, in_chs, activation_fn=nn.ReLU(inplace=True)): + super(CatBnAct, self).__init__() + self.bn = nn.BatchNorm2d(in_chs, eps=0.001) + self.act = activation_fn + + def forward(self, x): + x = torch.cat(x, dim=1) if isinstance(x, tuple) else x + return self.act(self.bn(x)) + + +class BnActConv2d(nn.Module): + def __init__(self, in_chs, out_chs, kernel_size, stride, + padding=0, groups=1, activation_fn=nn.ReLU(inplace=True)): + super(BnActConv2d, self).__init__() + self.bn = nn.BatchNorm2d(in_chs, eps=0.001) + self.act = activation_fn + self.conv = nn.Conv2d(in_chs, out_chs, kernel_size, stride, padding, groups=groups, bias=False) + + def forward(self, x): + return self.conv(self.act(self.bn(x))) + + +class InputBlock(nn.Module): + def __init__(self, num_init_features, kernel_size=7, + padding=3, activation_fn=nn.ReLU(inplace=True)): + super(InputBlock, self).__init__() + self.conv = nn.Conv2d( + 3, num_init_features, kernel_size=kernel_size, stride=2, padding=padding, bias=False) + self.bn = nn.BatchNorm2d(num_init_features, eps=0.001) + self.act = activation_fn + self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.act(x) + x = self.pool(x) + return x + + +class DualPathBlock(nn.Module): + def __init__( + self, in_chs, num_1x1_a, num_3x3_b, num_1x1_c, inc, groups, block_type='normal', b=False): + super(DualPathBlock, self).__init__() + self.num_1x1_c = num_1x1_c + self.inc = inc + self.b = b + if block_type is 'proj': + self.key_stride = 1 + self.has_proj = True + elif block_type is 'down': + self.key_stride = 2 + self.has_proj = True + else: + assert block_type is 'normal' + self.key_stride = 1 + self.has_proj = False + + if self.has_proj: + # Using different member names here to allow easier parameter key matching for conversion + if self.key_stride == 2: + self.c1x1_w_s2 = BnActConv2d( + in_chs=in_chs, out_chs=num_1x1_c + 2 * inc, kernel_size=1, stride=2) + else: + self.c1x1_w_s1 = BnActConv2d( + in_chs=in_chs, out_chs=num_1x1_c + 2 * inc, kernel_size=1, stride=1) + self.c1x1_a = BnActConv2d(in_chs=in_chs, out_chs=num_1x1_a, kernel_size=1, stride=1) + self.c3x3_b = BnActConv2d( + in_chs=num_1x1_a, out_chs=num_3x3_b, kernel_size=3, + stride=self.key_stride, padding=1, groups=groups) + if b: + self.c1x1_c = CatBnAct(in_chs=num_3x3_b) + self.c1x1_c1 = nn.Conv2d(num_3x3_b, num_1x1_c, kernel_size=1, bias=False) + self.c1x1_c2 = nn.Conv2d(num_3x3_b, inc, kernel_size=1, bias=False) + else: + self.c1x1_c = BnActConv2d(in_chs=num_3x3_b, out_chs=num_1x1_c + inc, kernel_size=1, stride=1) + + def forward(self, x): + x_in = torch.cat(x, dim=1) if isinstance(x, tuple) else x + if self.has_proj: + if self.key_stride == 2: + x_s = self.c1x1_w_s2(x_in) + else: + x_s = self.c1x1_w_s1(x_in) + x_s1 = x_s[:, :self.num_1x1_c, :, :] + x_s2 = x_s[:, self.num_1x1_c:, :, :] + else: + x_s1 = x[0] + x_s2 = x[1] + x_in = self.c1x1_a(x_in) + x_in = self.c3x3_b(x_in) + if self.b: + x_in = self.c1x1_c(x_in) + out1 = self.c1x1_c1(x_in) + out2 = self.c1x1_c2(x_in) + else: + x_in = self.c1x1_c(x_in) + out1 = x_in[:, :self.num_1x1_c, :, :] + out2 = x_in[:, self.num_1x1_c:, :, :] + resid = x_s1 + out1 + dense = torch.cat([x_s2, out2], dim=1) + return resid, dense + + +class DPN(nn.Module): + def __init__(self, small=False, num_init_features=64, k_r=96, groups=32, + b=False, k_sec=(3, 4, 20, 3), inc_sec=(16, 32, 24, 128), + num_classes=1000, test_time_pool=False): + super(DPN, self).__init__() + self.test_time_pool = test_time_pool + self.b = b + bw_factor = 1 if small else 4 + + blocks = OrderedDict() + + # conv1 + if small: + blocks['conv1_1'] = InputBlock(num_init_features, kernel_size=3, padding=1) + else: + blocks['conv1_1'] = InputBlock(num_init_features, kernel_size=7, padding=3) + + # conv2 + bw = 64 * bw_factor + inc = inc_sec[0] + r = (k_r * bw) // (64 * bw_factor) + blocks['conv2_1'] = DualPathBlock(num_init_features, r, r, bw, inc, groups, 'proj', b) + in_chs = bw + 3 * inc + for i in range(2, k_sec[0] + 1): + blocks['conv2_' + str(i)] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'normal', b) + in_chs += inc + + # conv3 + bw = 128 * bw_factor + inc = inc_sec[1] + r = (k_r * bw) // (64 * bw_factor) + blocks['conv3_1'] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'down', b) + in_chs = bw + 3 * inc + for i in range(2, k_sec[1] + 1): + blocks['conv3_' + str(i)] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'normal', b) + in_chs += inc + + # conv4 + bw = 256 * bw_factor + inc = inc_sec[2] + r = (k_r * bw) // (64 * bw_factor) + blocks['conv4_1'] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'down', b) + in_chs = bw + 3 * inc + for i in range(2, k_sec[2] + 1): + blocks['conv4_' + str(i)] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'normal', b) + in_chs += inc + + # conv5 + bw = 512 * bw_factor + inc = inc_sec[3] + r = (k_r * bw) // (64 * bw_factor) + blocks['conv5_1'] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'down', b) + in_chs = bw + 3 * inc + for i in range(2, k_sec[3] + 1): + blocks['conv5_' + str(i)] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'normal', b) + in_chs += inc + blocks['conv5_bn_ac'] = CatBnAct(in_chs) + + self.features = nn.Sequential(blocks) + + # Using 1x1 conv for the FC layer to allow the extra pooling scheme + self.last_linear = nn.Conv2d(in_chs, num_classes, kernel_size=1, bias=True) + + def logits(self, features): + if not self.training and self.test_time_pool: + x = F.avg_pool2d(features, kernel_size=7, stride=1) + out = self.last_linear(x) + # The extra test time pool should be pooling an img_size//32 - 6 size patch + out = adaptive_avgmax_pool2d(out, pool_type='avgmax') + else: + x = adaptive_avgmax_pool2d(features, pool_type='avg') + out = self.last_linear(x) + return out.view(out.size(0), -1) + + def forward(self, input): + x = self.features(input) + x = self.logits(x) + return x + +""" PyTorch selectable adaptive pooling +Adaptive pooling with the ability to select the type of pooling from: + * 'avg' - Average pooling + * 'max' - Max pooling + * 'avgmax' - Sum of average and max pooling re-scaled by 0.5 + * 'avgmaxc' - Concatenation of average and max pooling along feature dim, doubles feature dim + +Both a functional and a nn.Module version of the pooling is provided. + +Author: Ross Wightman (rwightman) +""" + +def pooling_factor(pool_type='avg'): + return 2 if pool_type == 'avgmaxc' else 1 + + +def adaptive_avgmax_pool2d(x, pool_type='avg', padding=0, count_include_pad=False): + """Selectable global pooling function with dynamic input kernel size + """ + if pool_type == 'avgmaxc': + x = torch.cat([ + F.avg_pool2d( + x, kernel_size=(x.size(2), x.size(3)), padding=padding, count_include_pad=count_include_pad), + F.max_pool2d(x, kernel_size=(x.size(2), x.size(3)), padding=padding) + ], dim=1) + elif pool_type == 'avgmax': + x_avg = F.avg_pool2d( + x, kernel_size=(x.size(2), x.size(3)), padding=padding, count_include_pad=count_include_pad) + x_max = F.max_pool2d(x, kernel_size=(x.size(2), x.size(3)), padding=padding) + x = 0.5 * (x_avg + x_max) + elif pool_type == 'max': + x = F.max_pool2d(x, kernel_size=(x.size(2), x.size(3)), padding=padding) + else: + if pool_type != 'avg': + print('Invalid pool type %s specified. Defaulting to average pooling.' % pool_type) + x = F.avg_pool2d( + x, kernel_size=(x.size(2), x.size(3)), padding=padding, count_include_pad=count_include_pad) + return x + + +class AdaptiveAvgMaxPool2d(torch.nn.Module): + """Selectable global pooling layer with dynamic input kernel size + """ + def __init__(self, output_size=1, pool_type='avg'): + super(AdaptiveAvgMaxPool2d, self).__init__() + self.output_size = output_size + self.pool_type = pool_type + if pool_type == 'avgmaxc' or pool_type == 'avgmax': + self.pool = nn.ModuleList([nn.AdaptiveAvgPool2d(output_size), nn.AdaptiveMaxPool2d(output_size)]) + elif pool_type == 'max': + self.pool = nn.AdaptiveMaxPool2d(output_size) + else: + if pool_type != 'avg': + print('Invalid pool type %s specified. Defaulting to average pooling.' % pool_type) + self.pool = nn.AdaptiveAvgPool2d(output_size) + + def forward(self, x): + if self.pool_type == 'avgmaxc': + x = torch.cat([p(x) for p in self.pool], dim=1) + elif self.pool_type == 'avgmax': + x = 0.5 * torch.sum(torch.stack([p(x) for p in self.pool]), 0).squeeze(dim=0) + else: + x = self.pool(x) + return x + + def factor(self): + return pooling_factor(self.pool_type) + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + 'output_size=' + str(self.output_size) \ + + ', pool_type=' + self.pool_type + ')' diff --git a/segmentation_models_pytorch/encoders/_inceptionresnetv2.py b/segmentation_models_pytorch/encoders/_inceptionresnetv2.py new file mode 100644 index 00000000..8f55bb0b --- /dev/null +++ b/segmentation_models_pytorch/encoders/_inceptionresnetv2.py @@ -0,0 +1,380 @@ +from __future__ import print_function, division, absolute_import +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +import os +import sys + +__all__ = ['InceptionResNetV2', 'inceptionresnetv2'] + +pretrained_settings = { + 'inceptionresnetv2': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth', + 'input_space': 'RGB', + 'input_size': [3, 299, 299], + 'input_range': [0, 1], + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + 'num_classes': 1000 + }, + 'imagenet+background': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth', + 'input_space': 'RGB', + 'input_size': [3, 299, 299], + 'input_range': [0, 1], + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + 'num_classes': 1001 + } + } +} + + +class BasicConv2d(nn.Module): + + def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0): + super(BasicConv2d, self).__init__() + self.conv = nn.Conv2d(in_planes, out_planes, + kernel_size=kernel_size, stride=stride, + padding=padding, bias=False) # verify bias false + self.bn = nn.BatchNorm2d(out_planes, + eps=0.001, # value found in tensorflow + momentum=0.1, # default pytorch value + affine=True) + self.relu = nn.ReLU(inplace=False) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class Mixed_5b(nn.Module): + + def __init__(self): + super(Mixed_5b, self).__init__() + + self.branch0 = BasicConv2d(192, 96, kernel_size=1, stride=1) + + self.branch1 = nn.Sequential( + BasicConv2d(192, 48, kernel_size=1, stride=1), + BasicConv2d(48, 64, kernel_size=5, stride=1, padding=2) + ) + + self.branch2 = nn.Sequential( + BasicConv2d(192, 64, kernel_size=1, stride=1), + BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1), + BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1) + ) + + self.branch3 = nn.Sequential( + nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), + BasicConv2d(192, 64, kernel_size=1, stride=1) + ) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + x3 = self.branch3(x) + out = torch.cat((x0, x1, x2, x3), 1) + return out + + +class Block35(nn.Module): + + def __init__(self, scale=1.0): + super(Block35, self).__init__() + + self.scale = scale + + self.branch0 = BasicConv2d(320, 32, kernel_size=1, stride=1) + + self.branch1 = nn.Sequential( + BasicConv2d(320, 32, kernel_size=1, stride=1), + BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1) + ) + + self.branch2 = nn.Sequential( + BasicConv2d(320, 32, kernel_size=1, stride=1), + BasicConv2d(32, 48, kernel_size=3, stride=1, padding=1), + BasicConv2d(48, 64, kernel_size=3, stride=1, padding=1) + ) + + self.conv2d = nn.Conv2d(128, 320, kernel_size=1, stride=1) + self.relu = nn.ReLU(inplace=False) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + out = torch.cat((x0, x1, x2), 1) + out = self.conv2d(out) + out = out * self.scale + x + out = self.relu(out) + return out + + +class Mixed_6a(nn.Module): + + def __init__(self): + super(Mixed_6a, self).__init__() + + self.branch0 = BasicConv2d(320, 384, kernel_size=3, stride=2) + + self.branch1 = nn.Sequential( + BasicConv2d(320, 256, kernel_size=1, stride=1), + BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1), + BasicConv2d(256, 384, kernel_size=3, stride=2) + ) + + self.branch2 = nn.MaxPool2d(3, stride=2) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + out = torch.cat((x0, x1, x2), 1) + return out + + +class Block17(nn.Module): + + def __init__(self, scale=1.0): + super(Block17, self).__init__() + + self.scale = scale + + self.branch0 = BasicConv2d(1088, 192, kernel_size=1, stride=1) + + self.branch1 = nn.Sequential( + BasicConv2d(1088, 128, kernel_size=1, stride=1), + BasicConv2d(128, 160, kernel_size=(1,7), stride=1, padding=(0,3)), + BasicConv2d(160, 192, kernel_size=(7,1), stride=1, padding=(3,0)) + ) + + self.conv2d = nn.Conv2d(384, 1088, kernel_size=1, stride=1) + self.relu = nn.ReLU(inplace=False) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + out = torch.cat((x0, x1), 1) + out = self.conv2d(out) + out = out * self.scale + x + out = self.relu(out) + return out + + +class Mixed_7a(nn.Module): + + def __init__(self): + super(Mixed_7a, self).__init__() + + self.branch0 = nn.Sequential( + BasicConv2d(1088, 256, kernel_size=1, stride=1), + BasicConv2d(256, 384, kernel_size=3, stride=2) + ) + + self.branch1 = nn.Sequential( + BasicConv2d(1088, 256, kernel_size=1, stride=1), + BasicConv2d(256, 288, kernel_size=3, stride=2) + ) + + self.branch2 = nn.Sequential( + BasicConv2d(1088, 256, kernel_size=1, stride=1), + BasicConv2d(256, 288, kernel_size=3, stride=1, padding=1), + BasicConv2d(288, 320, kernel_size=3, stride=2) + ) + + self.branch3 = nn.MaxPool2d(3, stride=2) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + x3 = self.branch3(x) + out = torch.cat((x0, x1, x2, x3), 1) + return out + + +class Block8(nn.Module): + + def __init__(self, scale=1.0, noReLU=False): + super(Block8, self).__init__() + + self.scale = scale + self.noReLU = noReLU + + self.branch0 = BasicConv2d(2080, 192, kernel_size=1, stride=1) + + self.branch1 = nn.Sequential( + BasicConv2d(2080, 192, kernel_size=1, stride=1), + BasicConv2d(192, 224, kernel_size=(1,3), stride=1, padding=(0,1)), + BasicConv2d(224, 256, kernel_size=(3,1), stride=1, padding=(1,0)) + ) + + self.conv2d = nn.Conv2d(448, 2080, kernel_size=1, stride=1) + if not self.noReLU: + self.relu = nn.ReLU(inplace=False) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + out = torch.cat((x0, x1), 1) + out = self.conv2d(out) + out = out * self.scale + x + if not self.noReLU: + out = self.relu(out) + return out + + +class InceptionResNetV2(nn.Module): + + def __init__(self, num_classes=1001): + super(InceptionResNetV2, self).__init__() + # Special attributs + self.input_space = None + self.input_size = (299, 299, 3) + self.mean = None + self.std = None + # Modules + self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2) + self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1) + self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1) + self.maxpool_3a = nn.MaxPool2d(3, stride=2) + self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1) + self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1) + self.maxpool_5a = nn.MaxPool2d(3, stride=2) + self.mixed_5b = Mixed_5b() + self.repeat = nn.Sequential( + Block35(scale=0.17), + Block35(scale=0.17), + Block35(scale=0.17), + Block35(scale=0.17), + Block35(scale=0.17), + Block35(scale=0.17), + Block35(scale=0.17), + Block35(scale=0.17), + Block35(scale=0.17), + Block35(scale=0.17) + ) + self.mixed_6a = Mixed_6a() + self.repeat_1 = nn.Sequential( + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10), + Block17(scale=0.10) + ) + self.mixed_7a = Mixed_7a() + self.repeat_2 = nn.Sequential( + Block8(scale=0.20), + Block8(scale=0.20), + Block8(scale=0.20), + Block8(scale=0.20), + Block8(scale=0.20), + Block8(scale=0.20), + Block8(scale=0.20), + Block8(scale=0.20), + Block8(scale=0.20) + ) + self.block8 = Block8(noReLU=True) + self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1) + self.avgpool_1a = nn.AvgPool2d(8, count_include_pad=False) + self.last_linear = nn.Linear(1536, num_classes) + + def features(self, input): + x = self.conv2d_1a(input) + x = self.conv2d_2a(x) + x = self.conv2d_2b(x) + x = self.maxpool_3a(x) + x = self.conv2d_3b(x) + x = self.conv2d_4a(x) + x = self.maxpool_5a(x) + x = self.mixed_5b(x) + x = self.repeat(x) + x = self.mixed_6a(x) + x = self.repeat_1(x) + x = self.mixed_7a(x) + x = self.repeat_2(x) + x = self.block8(x) + x = self.conv2d_7b(x) + return x + + def logits(self, features): + x = self.avgpool_1a(features) + x = x.view(x.size(0), -1) + x = self.last_linear(x) + return x + + def forward(self, input): + x = self.features(input) + x = self.logits(x) + return x + +def inceptionresnetv2(num_classes=1000, pretrained='imagenet'): + r"""InceptionResNetV2 model architecture from the + `"InceptionV4, Inception-ResNet..." `_ paper. + """ + if pretrained: + settings = pretrained_settings['inceptionresnetv2'][pretrained] + assert num_classes == settings['num_classes'], \ + "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) + + # both 'imagenet'&'imagenet+background' are loaded from same parameters + model = InceptionResNetV2(num_classes=1001) + model.load_state_dict(model_zoo.load_url(settings['url'])) + + if pretrained == 'imagenet': + new_last_linear = nn.Linear(1536, 1000) + new_last_linear.weight.data = model.last_linear.weight.data[1:] + new_last_linear.bias.data = model.last_linear.bias.data[1:] + model.last_linear = new_last_linear + + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + + model.mean = settings['mean'] + model.std = settings['std'] + else: + model = InceptionResNetV2(num_classes=num_classes) + return model + +''' +TEST +Run this code with: +``` +cd $HOME/pretrained-models.pytorch +python -m pretrainedmodels.inceptionresnetv2 +``` +''' +if __name__ == '__main__': + + assert inceptionresnetv2(num_classes=10, pretrained=None) + print('success') + assert inceptionresnetv2(num_classes=1000, pretrained='imagenet') + print('success') + assert inceptionresnetv2(num_classes=1001, pretrained='imagenet+background') + print('success') + + # fail + assert inceptionresnetv2(num_classes=1001, pretrained='imagenet') \ No newline at end of file diff --git a/segmentation_models_pytorch/encoders/_inceptionv4.py b/segmentation_models_pytorch/encoders/_inceptionv4.py new file mode 100644 index 00000000..d48f7b77 --- /dev/null +++ b/segmentation_models_pytorch/encoders/_inceptionv4.py @@ -0,0 +1,358 @@ +from __future__ import print_function, division, absolute_import +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as model_zoo +import os +import sys + +__all__ = ['InceptionV4', 'inceptionv4'] + +pretrained_settings = { + 'inceptionv4': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth', + 'input_space': 'RGB', + 'input_size': [3, 299, 299], + 'input_range': [0, 1], + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + 'num_classes': 1000 + }, + 'imagenet+background': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth', + 'input_space': 'RGB', + 'input_size': [3, 299, 299], + 'input_range': [0, 1], + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + 'num_classes': 1001 + } + } +} + + +class BasicConv2d(nn.Module): + + def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0): + super(BasicConv2d, self).__init__() + self.conv = nn.Conv2d(in_planes, out_planes, + kernel_size=kernel_size, stride=stride, + padding=padding, bias=False) # verify bias false + self.bn = nn.BatchNorm2d(out_planes, + eps=0.001, # value found in tensorflow + momentum=0.1, # default pytorch value + affine=True) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class Mixed_3a(nn.Module): + + def __init__(self): + super(Mixed_3a, self).__init__() + self.maxpool = nn.MaxPool2d(3, stride=2) + self.conv = BasicConv2d(64, 96, kernel_size=3, stride=2) + + def forward(self, x): + x0 = self.maxpool(x) + x1 = self.conv(x) + out = torch.cat((x0, x1), 1) + return out + + +class Mixed_4a(nn.Module): + + def __init__(self): + super(Mixed_4a, self).__init__() + + self.branch0 = nn.Sequential( + BasicConv2d(160, 64, kernel_size=1, stride=1), + BasicConv2d(64, 96, kernel_size=3, stride=1) + ) + + self.branch1 = nn.Sequential( + BasicConv2d(160, 64, kernel_size=1, stride=1), + BasicConv2d(64, 64, kernel_size=(1,7), stride=1, padding=(0,3)), + BasicConv2d(64, 64, kernel_size=(7,1), stride=1, padding=(3,0)), + BasicConv2d(64, 96, kernel_size=(3,3), stride=1) + ) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + out = torch.cat((x0, x1), 1) + return out + + +class Mixed_5a(nn.Module): + + def __init__(self): + super(Mixed_5a, self).__init__() + self.conv = BasicConv2d(192, 192, kernel_size=3, stride=2) + self.maxpool = nn.MaxPool2d(3, stride=2) + + def forward(self, x): + x0 = self.conv(x) + x1 = self.maxpool(x) + out = torch.cat((x0, x1), 1) + return out + + +class Inception_A(nn.Module): + + def __init__(self): + super(Inception_A, self).__init__() + self.branch0 = BasicConv2d(384, 96, kernel_size=1, stride=1) + + self.branch1 = nn.Sequential( + BasicConv2d(384, 64, kernel_size=1, stride=1), + BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1) + ) + + self.branch2 = nn.Sequential( + BasicConv2d(384, 64, kernel_size=1, stride=1), + BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1), + BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1) + ) + + self.branch3 = nn.Sequential( + nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), + BasicConv2d(384, 96, kernel_size=1, stride=1) + ) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + x3 = self.branch3(x) + out = torch.cat((x0, x1, x2, x3), 1) + return out + + +class Reduction_A(nn.Module): + + def __init__(self): + super(Reduction_A, self).__init__() + self.branch0 = BasicConv2d(384, 384, kernel_size=3, stride=2) + + self.branch1 = nn.Sequential( + BasicConv2d(384, 192, kernel_size=1, stride=1), + BasicConv2d(192, 224, kernel_size=3, stride=1, padding=1), + BasicConv2d(224, 256, kernel_size=3, stride=2) + ) + + self.branch2 = nn.MaxPool2d(3, stride=2) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + out = torch.cat((x0, x1, x2), 1) + return out + + +class Inception_B(nn.Module): + + def __init__(self): + super(Inception_B, self).__init__() + self.branch0 = BasicConv2d(1024, 384, kernel_size=1, stride=1) + + self.branch1 = nn.Sequential( + BasicConv2d(1024, 192, kernel_size=1, stride=1), + BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)), + BasicConv2d(224, 256, kernel_size=(7,1), stride=1, padding=(3,0)) + ) + + self.branch2 = nn.Sequential( + BasicConv2d(1024, 192, kernel_size=1, stride=1), + BasicConv2d(192, 192, kernel_size=(7,1), stride=1, padding=(3,0)), + BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)), + BasicConv2d(224, 224, kernel_size=(7,1), stride=1, padding=(3,0)), + BasicConv2d(224, 256, kernel_size=(1,7), stride=1, padding=(0,3)) + ) + + self.branch3 = nn.Sequential( + nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), + BasicConv2d(1024, 128, kernel_size=1, stride=1) + ) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + x3 = self.branch3(x) + out = torch.cat((x0, x1, x2, x3), 1) + return out + + +class Reduction_B(nn.Module): + + def __init__(self): + super(Reduction_B, self).__init__() + + self.branch0 = nn.Sequential( + BasicConv2d(1024, 192, kernel_size=1, stride=1), + BasicConv2d(192, 192, kernel_size=3, stride=2) + ) + + self.branch1 = nn.Sequential( + BasicConv2d(1024, 256, kernel_size=1, stride=1), + BasicConv2d(256, 256, kernel_size=(1,7), stride=1, padding=(0,3)), + BasicConv2d(256, 320, kernel_size=(7,1), stride=1, padding=(3,0)), + BasicConv2d(320, 320, kernel_size=3, stride=2) + ) + + self.branch2 = nn.MaxPool2d(3, stride=2) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + out = torch.cat((x0, x1, x2), 1) + return out + + +class Inception_C(nn.Module): + + def __init__(self): + super(Inception_C, self).__init__() + + self.branch0 = BasicConv2d(1536, 256, kernel_size=1, stride=1) + + self.branch1_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1) + self.branch1_1a = BasicConv2d(384, 256, kernel_size=(1,3), stride=1, padding=(0,1)) + self.branch1_1b = BasicConv2d(384, 256, kernel_size=(3,1), stride=1, padding=(1,0)) + + self.branch2_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1) + self.branch2_1 = BasicConv2d(384, 448, kernel_size=(3,1), stride=1, padding=(1,0)) + self.branch2_2 = BasicConv2d(448, 512, kernel_size=(1,3), stride=1, padding=(0,1)) + self.branch2_3a = BasicConv2d(512, 256, kernel_size=(1,3), stride=1, padding=(0,1)) + self.branch2_3b = BasicConv2d(512, 256, kernel_size=(3,1), stride=1, padding=(1,0)) + + self.branch3 = nn.Sequential( + nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), + BasicConv2d(1536, 256, kernel_size=1, stride=1) + ) + + def forward(self, x): + x0 = self.branch0(x) + + x1_0 = self.branch1_0(x) + x1_1a = self.branch1_1a(x1_0) + x1_1b = self.branch1_1b(x1_0) + x1 = torch.cat((x1_1a, x1_1b), 1) + + x2_0 = self.branch2_0(x) + x2_1 = self.branch2_1(x2_0) + x2_2 = self.branch2_2(x2_1) + x2_3a = self.branch2_3a(x2_2) + x2_3b = self.branch2_3b(x2_2) + x2 = torch.cat((x2_3a, x2_3b), 1) + + x3 = self.branch3(x) + + out = torch.cat((x0, x1, x2, x3), 1) + return out + + +class InceptionV4(nn.Module): + + def __init__(self, num_classes=1001): + super(InceptionV4, self).__init__() + # Special attributs + self.input_space = None + self.input_size = (299, 299, 3) + self.mean = None + self.std = None + # Modules + self.features = nn.Sequential( + BasicConv2d(3, 32, kernel_size=3, stride=2), + BasicConv2d(32, 32, kernel_size=3, stride=1), + BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1), + Mixed_3a(), + Mixed_4a(), + Mixed_5a(), + Inception_A(), + Inception_A(), + Inception_A(), + Inception_A(), + Reduction_A(), # Mixed_6a + Inception_B(), + Inception_B(), + Inception_B(), + Inception_B(), + Inception_B(), + Inception_B(), + Inception_B(), + Reduction_B(), # Mixed_7a + Inception_C(), + Inception_C(), + Inception_C() + ) + self.last_linear = nn.Linear(1536, num_classes) + + def logits(self, features): + #Allows image of any size to be processed + adaptiveAvgPoolWidth = features.shape[2] + x = F.avg_pool2d(features, kernel_size=adaptiveAvgPoolWidth) + x = x.view(x.size(0), -1) + x = self.last_linear(x) + return x + + def forward(self, input): + x = self.features(input) + x = self.logits(x) + return x + + +def inceptionv4(num_classes=1000, pretrained='imagenet'): + if pretrained: + settings = pretrained_settings['inceptionv4'][pretrained] + assert num_classes == settings['num_classes'], \ + "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) + + # both 'imagenet'&'imagenet+background' are loaded from same parameters + model = InceptionV4(num_classes=1001) + model.load_state_dict(model_zoo.load_url(settings['url'])) + + if pretrained == 'imagenet': + new_last_linear = nn.Linear(1536, 1000) + new_last_linear.weight.data = model.last_linear.weight.data[1:] + new_last_linear.bias.data = model.last_linear.bias.data[1:] + model.last_linear = new_last_linear + + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + else: + model = InceptionV4(num_classes=num_classes) + return model + + +''' +TEST +Run this code with: +``` +cd $HOME/pretrained-models.pytorch +python -m pretrainedmodels.inceptionv4 +``` +''' +if __name__ == '__main__': + + assert inceptionv4(num_classes=10, pretrained=None) + print('success') + assert inceptionv4(num_classes=1000, pretrained='imagenet') + print('success') + assert inceptionv4(num_classes=1001, pretrained='imagenet+background') + print('success') + + # fail + assert inceptionv4(num_classes=1001, pretrained='imagenet') diff --git a/segmentation_models_pytorch/encoders/_senet.py b/segmentation_models_pytorch/encoders/_senet.py new file mode 100644 index 00000000..20bd122f --- /dev/null +++ b/segmentation_models_pytorch/encoders/_senet.py @@ -0,0 +1,442 @@ +""" +ResNet code gently borrowed from +https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py +""" +from __future__ import print_function, division, absolute_import +from collections import OrderedDict +import math + +import torch.nn as nn +from torch.utils import model_zoo + +__all__ = ['SENet', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', + 'se_resnext50_32x4d', 'se_resnext101_32x4d'] + +pretrained_settings = { + 'senet154': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, + 'se_resnet50': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, + 'se_resnet101': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, + 'se_resnet152': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, + 'se_resnext50_32x4d': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, + 'se_resnext101_32x4d': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, +} + + +class SEModule(nn.Module): + + def __init__(self, channels, reduction): + super(SEModule, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, + padding=0) + self.relu = nn.ReLU(inplace=True) + self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, + padding=0) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + module_input = x + x = self.avg_pool(x) + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.sigmoid(x) + return module_input * x + + +class Bottleneck(nn.Module): + """ + Base class for bottlenecks that implements `forward()` method. + """ + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out = self.se_module(out) + residual + out = self.relu(out) + + return out + + +class SEBottleneck(Bottleneck): + """ + Bottleneck for SENet154. + """ + expansion = 4 + + def __init__(self, inplanes, planes, groups, reduction, stride=1, + downsample=None): + super(SEBottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes * 2) + self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3, + stride=stride, padding=1, groups=groups, + bias=False) + self.bn2 = nn.BatchNorm2d(planes * 4) + self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1, + bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.se_module = SEModule(planes * 4, reduction=reduction) + self.downsample = downsample + self.stride = stride + + +class SEResNetBottleneck(Bottleneck): + """ + ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe + implementation and uses `stride=stride` in `conv1` and not in `conv2` + (the latter is used in the torchvision implementation of ResNet). + """ + expansion = 4 + + def __init__(self, inplanes, planes, groups, reduction, stride=1, + downsample=None): + super(SEResNetBottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False, + stride=stride) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, + groups=groups, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.se_module = SEModule(planes * 4, reduction=reduction) + self.downsample = downsample + self.stride = stride + + +class SEResNeXtBottleneck(Bottleneck): + """ + ResNeXt bottleneck type C with a Squeeze-and-Excitation module. + """ + expansion = 4 + + def __init__(self, inplanes, planes, groups, reduction, stride=1, + downsample=None, base_width=4): + super(SEResNeXtBottleneck, self).__init__() + width = math.floor(planes * (base_width / 64)) * groups + self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, + stride=1) + self.bn1 = nn.BatchNorm2d(width) + self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride, + padding=1, groups=groups, bias=False) + self.bn2 = nn.BatchNorm2d(width) + self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.se_module = SEModule(planes * 4, reduction=reduction) + self.downsample = downsample + self.stride = stride + + +class SENet(nn.Module): + + def __init__(self, block, layers, groups, reduction, dropout_p=0.2, + inplanes=128, input_3x3=True, downsample_kernel_size=3, + downsample_padding=1, num_classes=1000): + """ + Parameters + ---------- + block (nn.Module): Bottleneck class. + - For SENet154: SEBottleneck + - For SE-ResNet models: SEResNetBottleneck + - For SE-ResNeXt models: SEResNeXtBottleneck + layers (list of ints): Number of residual blocks for 4 layers of the + network (layer1...layer4). + groups (int): Number of groups for the 3x3 convolution in each + bottleneck block. + - For SENet154: 64 + - For SE-ResNet models: 1 + - For SE-ResNeXt models: 32 + reduction (int): Reduction ratio for Squeeze-and-Excitation modules. + - For all models: 16 + dropout_p (float or None): Drop probability for the Dropout layer. + If `None` the Dropout layer is not used. + - For SENet154: 0.2 + - For SE-ResNet models: None + - For SE-ResNeXt models: None + inplanes (int): Number of input channels for layer1. + - For SENet154: 128 + - For SE-ResNet models: 64 + - For SE-ResNeXt models: 64 + input_3x3 (bool): If `True`, use three 3x3 convolutions instead of + a single 7x7 convolution in layer0. + - For SENet154: True + - For SE-ResNet models: False + - For SE-ResNeXt models: False + downsample_kernel_size (int): Kernel size for downsampling convolutions + in layer2, layer3 and layer4. + - For SENet154: 3 + - For SE-ResNet models: 1 + - For SE-ResNeXt models: 1 + downsample_padding (int): Padding for downsampling convolutions in + layer2, layer3 and layer4. + - For SENet154: 1 + - For SE-ResNet models: 0 + - For SE-ResNeXt models: 0 + num_classes (int): Number of outputs in `last_linear` layer. + - For all models: 1000 + """ + super(SENet, self).__init__() + self.inplanes = inplanes + if input_3x3: + layer0_modules = [ + ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, + bias=False)), + ('bn1', nn.BatchNorm2d(64)), + ('relu1', nn.ReLU(inplace=True)), + ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, + bias=False)), + ('bn2', nn.BatchNorm2d(64)), + ('relu2', nn.ReLU(inplace=True)), + ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, + bias=False)), + ('bn3', nn.BatchNorm2d(inplanes)), + ('relu3', nn.ReLU(inplace=True)), + ] + else: + layer0_modules = [ + ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, + padding=3, bias=False)), + ('bn1', nn.BatchNorm2d(inplanes)), + ('relu1', nn.ReLU(inplace=True)), + ] + # To preserve compatibility with Caffe weights `ceil_mode=True` + # is used instead of `padding=1`. + layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, + ceil_mode=True))) + self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) + self.layer1 = self._make_layer( + block, + planes=64, + blocks=layers[0], + groups=groups, + reduction=reduction, + downsample_kernel_size=1, + downsample_padding=0 + ) + self.layer2 = self._make_layer( + block, + planes=128, + blocks=layers[1], + stride=2, + groups=groups, + reduction=reduction, + downsample_kernel_size=downsample_kernel_size, + downsample_padding=downsample_padding + ) + self.layer3 = self._make_layer( + block, + planes=256, + blocks=layers[2], + stride=2, + groups=groups, + reduction=reduction, + downsample_kernel_size=downsample_kernel_size, + downsample_padding=downsample_padding + ) + self.layer4 = self._make_layer( + block, + planes=512, + blocks=layers[3], + stride=2, + groups=groups, + reduction=reduction, + downsample_kernel_size=downsample_kernel_size, + downsample_padding=downsample_padding + ) + self.avg_pool = nn.AvgPool2d(7, stride=1) + self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None + self.last_linear = nn.Linear(512 * block.expansion, num_classes) + + def _make_layer(self, block, planes, blocks, groups, reduction, stride=1, + downsample_kernel_size=1, downsample_padding=0): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=downsample_kernel_size, stride=stride, + padding=downsample_padding, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, groups, reduction, stride, + downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, groups, reduction)) + + return nn.Sequential(*layers) + + def features(self, x): + x = self.layer0(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + return x + + def logits(self, x): + x = self.avg_pool(x) + if self.dropout is not None: + x = self.dropout(x) + x = x.view(x.size(0), -1) + x = self.last_linear(x) + return x + + def forward(self, x): + x = self.features(x) + x = self.logits(x) + return x + + +def initialize_pretrained_model(model, num_classes, settings): + assert num_classes == settings['num_classes'], \ + 'num_classes should be {}, but is {}'.format( + settings['num_classes'], num_classes) + model.load_state_dict(model_zoo.load_url(settings['url'])) + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + + +def senet154(num_classes=1000, pretrained='imagenet'): + model = SENet(SEBottleneck, [3, 8, 36, 3], groups=64, reduction=16, + dropout_p=0.2, num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['senet154'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model + + +def se_resnet50(num_classes=1000, pretrained='imagenet'): + model = SENet(SEResNetBottleneck, [3, 4, 6, 3], groups=1, reduction=16, + dropout_p=None, inplanes=64, input_3x3=False, + downsample_kernel_size=1, downsample_padding=0, + num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['se_resnet50'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model + + +def se_resnet101(num_classes=1000, pretrained='imagenet'): + model = SENet(SEResNetBottleneck, [3, 4, 23, 3], groups=1, reduction=16, + dropout_p=None, inplanes=64, input_3x3=False, + downsample_kernel_size=1, downsample_padding=0, + num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['se_resnet101'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model + + +def se_resnet152(num_classes=1000, pretrained='imagenet'): + model = SENet(SEResNetBottleneck, [3, 8, 36, 3], groups=1, reduction=16, + dropout_p=None, inplanes=64, input_3x3=False, + downsample_kernel_size=1, downsample_padding=0, + num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['se_resnet152'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model + + +def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'): + model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16, + dropout_p=None, inplanes=64, input_3x3=False, + downsample_kernel_size=1, downsample_padding=0, + num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['se_resnext50_32x4d'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model + + +def se_resnext101_32x4d(num_classes=1000, pretrained='imagenet'): + model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16, + dropout_p=None, inplanes=64, input_3x3=False, + downsample_kernel_size=1, downsample_padding=0, + num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['se_resnext101_32x4d'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model diff --git a/segmentation_models_pytorch/encoders/_xception.py b/segmentation_models_pytorch/encoders/_xception.py new file mode 100644 index 00000000..7783c477 --- /dev/null +++ b/segmentation_models_pytorch/encoders/_xception.py @@ -0,0 +1,235 @@ +""" +Ported to pytorch thanks to [tstandley](https://github.com/tstandley/Xception-PyTorch) + +@author: tstandley +Adapted by cadene + +Creates an Xception Model as defined in: + +Francois Chollet +Xception: Deep Learning with Depthwise Separable Convolutions +https://arxiv.org/pdf/1610.02357.pdf + +This weights ported from the Keras implementation. Achieves the following performance on the validation set: + +Loss:0.9173 Prec@1:78.892 Prec@5:94.292 + +REMEMBER to set your image size to 3x299x299 for both test and validation + +normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]) + +The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299 +""" +from __future__ import print_function, division, absolute_import +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as model_zoo +from torch.nn import init + +__all__ = ['xception'] + +pretrained_settings = { + 'xception': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth', + 'input_space': 'RGB', + 'input_size': [3, 299, 299], + 'input_range': [0, 1], + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + 'num_classes': 1000, + 'scale': 0.8975 # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299 + } + } +} + + +class SeparableConv2d(nn.Module): + def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False): + super(SeparableConv2d,self).__init__() + + self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias) + self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias) + + def forward(self,x): + x = self.conv1(x) + x = self.pointwise(x) + return x + + +class Block(nn.Module): + def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True): + super(Block, self).__init__() + + if out_filters != in_filters or strides!=1: + self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False) + self.skipbn = nn.BatchNorm2d(out_filters) + else: + self.skip=None + + rep=[] + + filters=in_filters + if grow_first: + rep.append(nn.ReLU(inplace=True)) + rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False)) + rep.append(nn.BatchNorm2d(out_filters)) + filters = out_filters + + for i in range(reps-1): + rep.append(nn.ReLU(inplace=True)) + rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False)) + rep.append(nn.BatchNorm2d(filters)) + + if not grow_first: + rep.append(nn.ReLU(inplace=True)) + rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False)) + rep.append(nn.BatchNorm2d(out_filters)) + + if not start_with_relu: + rep = rep[1:] + else: + rep[0] = nn.ReLU(inplace=False) + + if strides != 1: + rep.append(nn.MaxPool2d(3,strides,1)) + self.rep = nn.Sequential(*rep) + + def forward(self,inp): + x = self.rep(inp) + + if self.skip is not None: + skip = self.skip(inp) + skip = self.skipbn(skip) + else: + skip = inp + + x+=skip + return x + + +class Xception(nn.Module): + """ + Xception optimized for the ImageNet dataset, as specified in + https://arxiv.org/pdf/1610.02357.pdf + """ + def __init__(self, num_classes=1000): + """ Constructor + Args: + num_classes: number of classes + """ + super(Xception, self).__init__() + self.num_classes = num_classes + + self.conv1 = nn.Conv2d(3, 32, 3,2, 0, bias=False) + self.bn1 = nn.BatchNorm2d(32) + self.relu1 = nn.ReLU(inplace=True) + + self.conv2 = nn.Conv2d(32,64,3,bias=False) + self.bn2 = nn.BatchNorm2d(64) + self.relu2 = nn.ReLU(inplace=True) + #do relu here + + self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True) + self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True) + self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True) + + self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True) + + self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True) + + self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False) + + self.conv3 = SeparableConv2d(1024,1536,3,1,1) + self.bn3 = nn.BatchNorm2d(1536) + self.relu3 = nn.ReLU(inplace=True) + + #do relu here + self.conv4 = SeparableConv2d(1536,2048,3,1,1) + self.bn4 = nn.BatchNorm2d(2048) + + self.fc = nn.Linear(2048, num_classes) + + # #------- init weights -------- + # for m in self.modules(): + # if isinstance(m, nn.Conv2d): + # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + # m.weight.data.normal_(0, math.sqrt(2. / n)) + # elif isinstance(m, nn.BatchNorm2d): + # m.weight.data.fill_(1) + # m.bias.data.zero_() + # #----------------------------- + + def features(self, input): + x = self.conv1(input) + x = self.bn1(x) + x = self.relu1(x) + + x = self.conv2(x) + x = self.bn2(x) + x = self.relu2(x) + + x = self.block1(x) + x = self.block2(x) + x = self.block3(x) + x = self.block4(x) + x = self.block5(x) + x = self.block6(x) + x = self.block7(x) + x = self.block8(x) + x = self.block9(x) + x = self.block10(x) + x = self.block11(x) + x = self.block12(x) + + x = self.conv3(x) + x = self.bn3(x) + x = self.relu3(x) + + x = self.conv4(x) + x = self.bn4(x) + return x + + def logits(self, features): + x = nn.ReLU(inplace=True)(features) + + x = F.adaptive_avg_pool2d(x, (1, 1)) + x = x.view(x.size(0), -1) + x = self.last_linear(x) + return x + + def forward(self, input): + x = self.features(input) + x = self.logits(x) + return x + + +def xception(num_classes=1000, pretrained='imagenet'): + model = Xception(num_classes=num_classes) + if pretrained: + settings = pretrained_settings['xception'][pretrained] + assert num_classes == settings['num_classes'], \ + "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) + + model = Xception(num_classes=num_classes) + model.load_state_dict(model_zoo.load_url(settings['url'])) + + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + + # TODO: ugly + model.last_linear = model.fc + del model.fc + return model diff --git a/segmentation_models_pytorch/encoders/dpn.py b/segmentation_models_pytorch/encoders/dpn.py index 4fe84328..b5226d4d 100644 --- a/segmentation_models_pytorch/encoders/dpn.py +++ b/segmentation_models_pytorch/encoders/dpn.py @@ -27,9 +27,8 @@ import torch.nn.functional as F from typing import List, Dict, Sequence -from pretrainedmodels.models.dpn import DPN - from ._base import EncoderMixin +from ._dpn import DPN class DPNEncoder(DPN, EncoderMixin): diff --git a/segmentation_models_pytorch/encoders/inceptionresnetv2.py b/segmentation_models_pytorch/encoders/inceptionresnetv2.py index 15bf6502..d7f83f9d 100644 --- a/segmentation_models_pytorch/encoders/inceptionresnetv2.py +++ b/segmentation_models_pytorch/encoders/inceptionresnetv2.py @@ -26,9 +26,9 @@ import torch import torch.nn as nn from typing import List -from pretrainedmodels.models.inceptionresnetv2 import InceptionResNetV2 from ._base import EncoderMixin +from ._inceptionresnetv2 import InceptionResNetV2 class InceptionResNetV2Encoder(InceptionResNetV2, EncoderMixin): diff --git a/segmentation_models_pytorch/encoders/inceptionv4.py b/segmentation_models_pytorch/encoders/inceptionv4.py index 12a7cc1b..3c335042 100644 --- a/segmentation_models_pytorch/encoders/inceptionv4.py +++ b/segmentation_models_pytorch/encoders/inceptionv4.py @@ -27,9 +27,9 @@ import torch.nn as nn from typing import List -from pretrainedmodels.models.inceptionv4 import InceptionV4 from ._base import EncoderMixin +from ._inceptionv4 import InceptionV4 class InceptionV4Encoder(InceptionV4, EncoderMixin): diff --git a/segmentation_models_pytorch/encoders/senet.py b/segmentation_models_pytorch/encoders/senet.py index 18dbfd91..03cf0820 100644 --- a/segmentation_models_pytorch/encoders/senet.py +++ b/segmentation_models_pytorch/encoders/senet.py @@ -26,13 +26,13 @@ import torch from typing import List, Dict, Sequence -from pretrainedmodels.models.senet import ( +from ._base import EncoderMixin +from ._senet import ( SENet, SEBottleneck, SEResNetBottleneck, SEResNeXtBottleneck, ) -from ._base import EncoderMixin class SENetEncoder(SENet, EncoderMixin): @@ -97,205 +97,3 @@ def load_state_dict(self, state_dict, **kwargs): state_dict.pop("last_linear.bias", None) state_dict.pop("last_linear.weight", None) super().load_state_dict(state_dict, **kwargs) - - -pretrained_settings = { - "senet154": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "se_resnet50": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "se_resnet101": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "se_resnet152": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "se_resnext50_32x4d": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "se_resnext101_32x4d": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, -} - - -senet_encoders = { - "senet154": { - "encoder": SENetEncoder, - "pretrained_settings": { - "imagenet": { - "repo_id": "smp-hub/senet154.imagenet", - "revision": "249f45efc9881ba560a0c480128edbc34ab87e40", - } - }, - "params": { - "out_channels": [3, 128, 256, 512, 1024, 2048], - "block": SEBottleneck, - "dropout_p": 0.2, - "groups": 64, - "layers": [3, 8, 36, 3], - "num_classes": 1000, - "reduction": 16, - }, - }, - "se_resnet50": { - "encoder": SENetEncoder, - "pretrained_settings": { - "imagenet": { - "repo_id": "smp-hub/se_resnet50.imagenet", - "revision": "e6b4bc2dc85226c3d3474544410724a485455459", - } - }, - "params": { - "out_channels": [3, 64, 256, 512, 1024, 2048], - "block": SEResNetBottleneck, - "layers": [3, 4, 6, 3], - "downsample_kernel_size": 1, - "downsample_padding": 0, - "dropout_p": None, - "groups": 1, - "inplanes": 64, - "input_3x3": False, - "num_classes": 1000, - "reduction": 16, - }, - }, - "se_resnet101": { - "encoder": SENetEncoder, - "pretrained_settings": { - "imagenet": { - "repo_id": "smp-hub/se_resnet101.imagenet", - "revision": "71fe95cc0a27f444cf83671f354de02dc741b18b", - } - }, - "params": { - "out_channels": [3, 64, 256, 512, 1024, 2048], - "block": SEResNetBottleneck, - "layers": [3, 4, 23, 3], - "downsample_kernel_size": 1, - "downsample_padding": 0, - "dropout_p": None, - "groups": 1, - "inplanes": 64, - "input_3x3": False, - "num_classes": 1000, - "reduction": 16, - }, - }, - "se_resnet152": { - "encoder": SENetEncoder, - "pretrained_settings": { - "imagenet": { - "repo_id": "smp-hub/se_resnet152.imagenet", - "revision": "e79fc3d9d76f197bd76a2593c2054edf1083fe32", - } - }, - "params": { - "out_channels": [3, 64, 256, 512, 1024, 2048], - "block": SEResNetBottleneck, - "layers": [3, 8, 36, 3], - "downsample_kernel_size": 1, - "downsample_padding": 0, - "dropout_p": None, - "groups": 1, - "inplanes": 64, - "input_3x3": False, - "num_classes": 1000, - "reduction": 16, - }, - }, - "se_resnext50_32x4d": { - "encoder": SENetEncoder, - "pretrained_settings": { - "imagenet": { - "repo_id": "smp-hub/se_resnext50_32x4d.imagenet", - "revision": "73246406d879a2b0e3fdfe6fddd56347d38f38ae", - } - }, - "params": { - "out_channels": [3, 64, 256, 512, 1024, 2048], - "block": SEResNeXtBottleneck, - "layers": [3, 4, 6, 3], - "downsample_kernel_size": 1, - "downsample_padding": 0, - "dropout_p": None, - "groups": 32, - "inplanes": 64, - "input_3x3": False, - "num_classes": 1000, - "reduction": 16, - }, - }, - "se_resnext101_32x4d": { - "encoder": SENetEncoder, - "pretrained_settings": { - "imagenet": { - "repo_id": "smp-hub/se_resnext101_32x4d.imagenet", - "revision": "18808a4276f46421d358a9de554e0b93c2795df4", - } - }, - "params": { - "out_channels": [3, 64, 256, 512, 1024, 2048], - "block": SEResNeXtBottleneck, - "layers": [3, 4, 23, 3], - "downsample_kernel_size": 1, - "downsample_padding": 0, - "dropout_p": None, - "groups": 32, - "inplanes": 64, - "input_3x3": False, - "num_classes": 1000, - "reduction": 16, - }, - }, -} diff --git a/segmentation_models_pytorch/encoders/xception.py b/segmentation_models_pytorch/encoders/xception.py index 594636a4..5ed70c70 100644 --- a/segmentation_models_pytorch/encoders/xception.py +++ b/segmentation_models_pytorch/encoders/xception.py @@ -1,7 +1,7 @@ from typing import List -from pretrainedmodels.models.xception import Xception from ._base import EncoderMixin +from ._xception import Xception class XceptionEncoder(Xception, EncoderMixin):