From 0da5651c74ad0b62159e3344a970d43bc2440aac Mon Sep 17 00:00:00 2001
From: "Adam J. Stewart" <ajstewart426@gmail.com>
Date: Sat, 18 Jan 2025 15:54:52 +0100
Subject: [PATCH] ruff

---
 segmentation_models_pytorch/encoders/_dpn.py  | 569 +++++++++++-------
 .../encoders/_inceptionresnetv2.py            | 147 ++---
 .../encoders/_inceptionv4.py                  | 187 +++---
 .../encoders/_senet.py                        | 416 ++++++++-----
 .../encoders/_xception.py                     | 166 +++--
 segmentation_models_pytorch/encoders/senet.py |   3 -
 6 files changed, 892 insertions(+), 596 deletions(-)

diff --git a/segmentation_models_pytorch/encoders/_dpn.py b/segmentation_models_pytorch/encoders/_dpn.py
index 5ab965ca..db3cb29a 100644
--- a/segmentation_models_pytorch/encoders/_dpn.py
+++ b/segmentation_models_pytorch/encoders/_dpn.py
@@ -1,4 +1,4 @@
-""" PyTorch implementation of DualPathNetworks
+"""PyTorch implementation of DualPathNetworks
 Ported to PyTorch by [Ross Wightman](https://github.com/rwightman/pytorch-dpn-pretrained)
 
 Based on original MXNet implementation https://github.com/cypw/DPNs with
@@ -7,39 +7,39 @@
 This implementation is compatible with the pretrained weights
 from cypw's MXNet implementation.
 """
-import os
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.utils.model_zoo as model_zoo
 from collections import OrderedDict
 
-__all__ = ['DPN', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107']
+__all__ = ["DPN", "dpn68", "dpn68b", "dpn92", "dpn98", "dpn131", "dpn107"]
 
 pretrained_settings = {
-    'dpn68': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn68-4af7d88d2.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [124 / 255, 117 / 255, 104 / 255],
-            'std': [1 / (.0167 * 255)] * 3,
-            'num_classes': 1000
+    "dpn68": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn68-4af7d88d2.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [124 / 255, 117 / 255, 104 / 255],
+            "std": [1 / (0.0167 * 255)] * 3,
+            "num_classes": 1000,
         }
     },
-    'dpn68b': {
-        'imagenet+5k': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn68b_extra-363ab9c19.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [124 / 255, 117 / 255, 104 / 255],
-            'std': [1 / (.0167 * 255)] * 3,
-            'num_classes': 1000
+    "dpn68b": {
+        "imagenet+5k": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn68b_extra-363ab9c19.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [124 / 255, 117 / 255, 104 / 255],
+            "std": [1 / (0.0167 * 255)] * 3,
+            "num_classes": 1000,
         }
     },
-    'dpn92': {
+    "dpn92": {
         # 'imagenet': {
         #     'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn68-66bebafa7.pth',
         #     'input_space': 'RGB',
@@ -49,157 +49,214 @@
         #     'std': [1 / (.0167 * 255)] * 3,
         #     'num_classes': 1000
         # },
-        'imagenet+5k': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn92_extra-fda993c95.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [124 / 255, 117 / 255, 104 / 255],
-            'std': [1 / (.0167 * 255)] * 3,
-            'num_classes': 1000
+        "imagenet+5k": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn92_extra-fda993c95.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [124 / 255, 117 / 255, 104 / 255],
+            "std": [1 / (0.0167 * 255)] * 3,
+            "num_classes": 1000,
         }
     },
-    'dpn98': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn98-722954780.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [124 / 255, 117 / 255, 104 / 255],
-            'std': [1 / (.0167 * 255)] * 3,
-            'num_classes': 1000
+    "dpn98": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn98-722954780.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [124 / 255, 117 / 255, 104 / 255],
+            "std": [1 / (0.0167 * 255)] * 3,
+            "num_classes": 1000,
         }
     },
-    'dpn131': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn131-7af84be88.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [124 / 255, 117 / 255, 104 / 255],
-            'std': [1 / (.0167 * 255)] * 3,
-            'num_classes': 1000
+    "dpn131": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn131-7af84be88.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [124 / 255, 117 / 255, 104 / 255],
+            "std": [1 / (0.0167 * 255)] * 3,
+            "num_classes": 1000,
         }
     },
-    'dpn107': {
-        'imagenet+5k': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/dpn107_extra-b7f9f4cc9.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [124 / 255, 117 / 255, 104 / 255],
-            'std': [1 / (.0167 * 255)] * 3,
-            'num_classes': 1000
+    "dpn107": {
+        "imagenet+5k": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn107_extra-b7f9f4cc9.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [124 / 255, 117 / 255, 104 / 255],
+            "std": [1 / (0.0167 * 255)] * 3,
+            "num_classes": 1000,
         }
-    }
+    },
 }
 
-def dpn68(num_classes=1000, pretrained='imagenet'):
+
+def dpn68(num_classes=1000, pretrained="imagenet"):
     model = DPN(
-        small=True, num_init_features=10, k_r=128, groups=32,
-        k_sec=(3, 4, 12, 3), inc_sec=(16, 32, 32, 64),
-        num_classes=num_classes, test_time_pool=True)
+        small=True,
+        num_init_features=10,
+        k_r=128,
+        groups=32,
+        k_sec=(3, 4, 12, 3),
+        inc_sec=(16, 32, 32, 64),
+        num_classes=num_classes,
+        test_time_pool=True,
+    )
     if pretrained:
-        settings = pretrained_settings['dpn68'][pretrained]
-        assert num_classes == settings['num_classes'], \
-            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
-
-        model.load_state_dict(model_zoo.load_url(settings['url']))
-        model.input_space = settings['input_space']
-        model.input_size = settings['input_size']
-        model.input_range = settings['input_range']
-        model.mean = settings['mean']
-        model.std = settings['std']
+        settings = pretrained_settings["dpn68"][pretrained]
+        assert num_classes == settings["num_classes"], (
+            "num_classes should be {}, but is {}".format(
+                settings["num_classes"], num_classes
+            )
+        )
+
+        model.load_state_dict(model_zoo.load_url(settings["url"]))
+        model.input_space = settings["input_space"]
+        model.input_size = settings["input_size"]
+        model.input_range = settings["input_range"]
+        model.mean = settings["mean"]
+        model.std = settings["std"]
     return model
 
-def dpn68b(num_classes=1000, pretrained='imagenet+5k'):
+
+def dpn68b(num_classes=1000, pretrained="imagenet+5k"):
     model = DPN(
-        small=True, num_init_features=10, k_r=128, groups=32,
-        b=True, k_sec=(3, 4, 12, 3), inc_sec=(16, 32, 32, 64),
-        num_classes=num_classes, test_time_pool=True)
+        small=True,
+        num_init_features=10,
+        k_r=128,
+        groups=32,
+        b=True,
+        k_sec=(3, 4, 12, 3),
+        inc_sec=(16, 32, 32, 64),
+        num_classes=num_classes,
+        test_time_pool=True,
+    )
     if pretrained:
-        settings = pretrained_settings['dpn68b'][pretrained]
-        assert num_classes == settings['num_classes'], \
-            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
-
-        model.load_state_dict(model_zoo.load_url(settings['url']))
-        model.input_space = settings['input_space']
-        model.input_size = settings['input_size']
-        model.input_range = settings['input_range']
-        model.mean = settings['mean']
-        model.std = settings['std']
+        settings = pretrained_settings["dpn68b"][pretrained]
+        assert num_classes == settings["num_classes"], (
+            "num_classes should be {}, but is {}".format(
+                settings["num_classes"], num_classes
+            )
+        )
+
+        model.load_state_dict(model_zoo.load_url(settings["url"]))
+        model.input_space = settings["input_space"]
+        model.input_size = settings["input_size"]
+        model.input_range = settings["input_range"]
+        model.mean = settings["mean"]
+        model.std = settings["std"]
     return model
 
-def dpn92(num_classes=1000, pretrained='imagenet+5k'):
+
+def dpn92(num_classes=1000, pretrained="imagenet+5k"):
     model = DPN(
-        num_init_features=64, k_r=96, groups=32,
-        k_sec=(3, 4, 20, 3), inc_sec=(16, 32, 24, 128),
-        num_classes=num_classes, test_time_pool=True)
+        num_init_features=64,
+        k_r=96,
+        groups=32,
+        k_sec=(3, 4, 20, 3),
+        inc_sec=(16, 32, 24, 128),
+        num_classes=num_classes,
+        test_time_pool=True,
+    )
     if pretrained:
-        settings = pretrained_settings['dpn92'][pretrained]
-        assert num_classes == settings['num_classes'], \
-            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
-
-        model.load_state_dict(model_zoo.load_url(settings['url']))
-        model.input_space = settings['input_space']
-        model.input_size = settings['input_size']
-        model.input_range = settings['input_range']
-        model.mean = settings['mean']
-        model.std = settings['std']
+        settings = pretrained_settings["dpn92"][pretrained]
+        assert num_classes == settings["num_classes"], (
+            "num_classes should be {}, but is {}".format(
+                settings["num_classes"], num_classes
+            )
+        )
+
+        model.load_state_dict(model_zoo.load_url(settings["url"]))
+        model.input_space = settings["input_space"]
+        model.input_size = settings["input_size"]
+        model.input_range = settings["input_range"]
+        model.mean = settings["mean"]
+        model.std = settings["std"]
     return model
 
-def dpn98(num_classes=1000, pretrained='imagenet'):
+
+def dpn98(num_classes=1000, pretrained="imagenet"):
     model = DPN(
-        num_init_features=96, k_r=160, groups=40,
-        k_sec=(3, 6, 20, 3), inc_sec=(16, 32, 32, 128),
-        num_classes=num_classes, test_time_pool=True)
+        num_init_features=96,
+        k_r=160,
+        groups=40,
+        k_sec=(3, 6, 20, 3),
+        inc_sec=(16, 32, 32, 128),
+        num_classes=num_classes,
+        test_time_pool=True,
+    )
     if pretrained:
-        settings = pretrained_settings['dpn98'][pretrained]
-        assert num_classes == settings['num_classes'], \
-            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
-
-        model.load_state_dict(model_zoo.load_url(settings['url']))
-        model.input_space = settings['input_space']
-        model.input_size = settings['input_size']
-        model.input_range = settings['input_range']
-        model.mean = settings['mean']
-        model.std = settings['std']
+        settings = pretrained_settings["dpn98"][pretrained]
+        assert num_classes == settings["num_classes"], (
+            "num_classes should be {}, but is {}".format(
+                settings["num_classes"], num_classes
+            )
+        )
+
+        model.load_state_dict(model_zoo.load_url(settings["url"]))
+        model.input_space = settings["input_space"]
+        model.input_size = settings["input_size"]
+        model.input_range = settings["input_range"]
+        model.mean = settings["mean"]
+        model.std = settings["std"]
     return model
 
-def dpn131(num_classes=1000, pretrained='imagenet'):
+
+def dpn131(num_classes=1000, pretrained="imagenet"):
     model = DPN(
-        num_init_features=128, k_r=160, groups=40,
-        k_sec=(4, 8, 28, 3), inc_sec=(16, 32, 32, 128),
-        num_classes=num_classes, test_time_pool=True)
+        num_init_features=128,
+        k_r=160,
+        groups=40,
+        k_sec=(4, 8, 28, 3),
+        inc_sec=(16, 32, 32, 128),
+        num_classes=num_classes,
+        test_time_pool=True,
+    )
     if pretrained:
-        settings = pretrained_settings['dpn131'][pretrained]
-        assert num_classes == settings['num_classes'], \
-            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
-
-        model.load_state_dict(model_zoo.load_url(settings['url']))
-        model.input_space = settings['input_space']
-        model.input_size = settings['input_size']
-        model.input_range = settings['input_range']
-        model.mean = settings['mean']
-        model.std = settings['std']
+        settings = pretrained_settings["dpn131"][pretrained]
+        assert num_classes == settings["num_classes"], (
+            "num_classes should be {}, but is {}".format(
+                settings["num_classes"], num_classes
+            )
+        )
+
+        model.load_state_dict(model_zoo.load_url(settings["url"]))
+        model.input_space = settings["input_space"]
+        model.input_size = settings["input_size"]
+        model.input_range = settings["input_range"]
+        model.mean = settings["mean"]
+        model.std = settings["std"]
     return model
 
-def dpn107(num_classes=1000, pretrained='imagenet+5k'):
+
+def dpn107(num_classes=1000, pretrained="imagenet+5k"):
     model = DPN(
-        num_init_features=128, k_r=200, groups=50,
-        k_sec=(4, 8, 20, 3), inc_sec=(20, 64, 64, 128),
-        num_classes=num_classes, test_time_pool=True)
+        num_init_features=128,
+        k_r=200,
+        groups=50,
+        k_sec=(4, 8, 20, 3),
+        inc_sec=(20, 64, 64, 128),
+        num_classes=num_classes,
+        test_time_pool=True,
+    )
     if pretrained:
-        settings = pretrained_settings['dpn107'][pretrained]
-        assert num_classes == settings['num_classes'], \
-            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
-
-        model.load_state_dict(model_zoo.load_url(settings['url']))
-        model.input_space = settings['input_space']
-        model.input_size = settings['input_size']
-        model.input_range = settings['input_range']
-        model.mean = settings['mean']
-        model.std = settings['std']
+        settings = pretrained_settings["dpn107"][pretrained]
+        assert num_classes == settings["num_classes"], (
+            "num_classes should be {}, but is {}".format(
+                settings["num_classes"], num_classes
+            )
+        )
+
+        model.load_state_dict(model_zoo.load_url(settings["url"]))
+        model.input_space = settings["input_space"]
+        model.input_size = settings["input_size"]
+        model.input_range = settings["input_range"]
+        model.mean = settings["mean"]
+        model.std = settings["std"]
     return model
 
 
@@ -215,23 +272,44 @@ def forward(self, x):
 
 
 class BnActConv2d(nn.Module):
-    def __init__(self, in_chs, out_chs, kernel_size, stride,
-                 padding=0, groups=1, activation_fn=nn.ReLU(inplace=True)):
+    def __init__(
+        self,
+        in_chs,
+        out_chs,
+        kernel_size,
+        stride,
+        padding=0,
+        groups=1,
+        activation_fn=nn.ReLU(inplace=True),
+    ):
         super(BnActConv2d, self).__init__()
         self.bn = nn.BatchNorm2d(in_chs, eps=0.001)
         self.act = activation_fn
-        self.conv = nn.Conv2d(in_chs, out_chs, kernel_size, stride, padding, groups=groups, bias=False)
+        self.conv = nn.Conv2d(
+            in_chs, out_chs, kernel_size, stride, padding, groups=groups, bias=False
+        )
 
     def forward(self, x):
         return self.conv(self.act(self.bn(x)))
 
 
 class InputBlock(nn.Module):
-    def __init__(self, num_init_features, kernel_size=7,
-                 padding=3, activation_fn=nn.ReLU(inplace=True)):
+    def __init__(
+        self,
+        num_init_features,
+        kernel_size=7,
+        padding=3,
+        activation_fn=nn.ReLU(inplace=True),
+    ):
         super(InputBlock, self).__init__()
         self.conv = nn.Conv2d(
-            3, num_init_features, kernel_size=kernel_size, stride=2, padding=padding, bias=False)
+            3,
+            num_init_features,
+            kernel_size=kernel_size,
+            stride=2,
+            padding=padding,
+            bias=False,
+        )
         self.bn = nn.BatchNorm2d(num_init_features, eps=0.001)
         self.act = activation_fn
         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
@@ -246,19 +324,28 @@ def forward(self, x):
 
 class DualPathBlock(nn.Module):
     def __init__(
-            self, in_chs, num_1x1_a, num_3x3_b, num_1x1_c, inc, groups, block_type='normal', b=False):
+        self,
+        in_chs,
+        num_1x1_a,
+        num_3x3_b,
+        num_1x1_c,
+        inc,
+        groups,
+        block_type="normal",
+        b=False,
+    ):
         super(DualPathBlock, self).__init__()
         self.num_1x1_c = num_1x1_c
         self.inc = inc
         self.b = b
-        if block_type is 'proj':
+        if block_type == "proj":
             self.key_stride = 1
             self.has_proj = True
-        elif block_type is 'down':
+        elif block_type == "down":
             self.key_stride = 2
             self.has_proj = True
         else:
-            assert block_type is 'normal'
+            assert block_type == "normal"
             self.key_stride = 1
             self.has_proj = False
 
@@ -266,20 +353,31 @@ def __init__(
             # Using different member names here to allow easier parameter key matching for conversion
             if self.key_stride == 2:
                 self.c1x1_w_s2 = BnActConv2d(
-                    in_chs=in_chs, out_chs=num_1x1_c + 2 * inc, kernel_size=1, stride=2)
+                    in_chs=in_chs, out_chs=num_1x1_c + 2 * inc, kernel_size=1, stride=2
+                )
             else:
                 self.c1x1_w_s1 = BnActConv2d(
-                    in_chs=in_chs, out_chs=num_1x1_c + 2 * inc, kernel_size=1, stride=1)
-        self.c1x1_a = BnActConv2d(in_chs=in_chs, out_chs=num_1x1_a, kernel_size=1, stride=1)
+                    in_chs=in_chs, out_chs=num_1x1_c + 2 * inc, kernel_size=1, stride=1
+                )
+        self.c1x1_a = BnActConv2d(
+            in_chs=in_chs, out_chs=num_1x1_a, kernel_size=1, stride=1
+        )
         self.c3x3_b = BnActConv2d(
-            in_chs=num_1x1_a, out_chs=num_3x3_b, kernel_size=3,
-            stride=self.key_stride, padding=1, groups=groups)
+            in_chs=num_1x1_a,
+            out_chs=num_3x3_b,
+            kernel_size=3,
+            stride=self.key_stride,
+            padding=1,
+            groups=groups,
+        )
         if b:
             self.c1x1_c = CatBnAct(in_chs=num_3x3_b)
             self.c1x1_c1 = nn.Conv2d(num_3x3_b, num_1x1_c, kernel_size=1, bias=False)
             self.c1x1_c2 = nn.Conv2d(num_3x3_b, inc, kernel_size=1, bias=False)
         else:
-            self.c1x1_c = BnActConv2d(in_chs=num_3x3_b, out_chs=num_1x1_c + inc, kernel_size=1, stride=1)
+            self.c1x1_c = BnActConv2d(
+                in_chs=num_3x3_b, out_chs=num_1x1_c + inc, kernel_size=1, stride=1
+            )
 
     def forward(self, x):
         x_in = torch.cat(x, dim=1) if isinstance(x, tuple) else x
@@ -288,8 +386,8 @@ def forward(self, x):
                 x_s = self.c1x1_w_s2(x_in)
             else:
                 x_s = self.c1x1_w_s1(x_in)
-            x_s1 = x_s[:, :self.num_1x1_c, :, :]
-            x_s2 = x_s[:, self.num_1x1_c:, :, :]
+            x_s1 = x_s[:, : self.num_1x1_c, :, :]
+            x_s2 = x_s[:, self.num_1x1_c :, :, :]
         else:
             x_s1 = x[0]
             x_s2 = x[1]
@@ -301,17 +399,26 @@ def forward(self, x):
             out2 = self.c1x1_c2(x_in)
         else:
             x_in = self.c1x1_c(x_in)
-            out1 = x_in[:, :self.num_1x1_c, :, :]
-            out2 = x_in[:, self.num_1x1_c:, :, :]
+            out1 = x_in[:, : self.num_1x1_c, :, :]
+            out2 = x_in[:, self.num_1x1_c :, :, :]
         resid = x_s1 + out1
         dense = torch.cat([x_s2, out2], dim=1)
         return resid, dense
 
 
 class DPN(nn.Module):
-    def __init__(self, small=False, num_init_features=64, k_r=96, groups=32,
-                 b=False, k_sec=(3, 4, 20, 3), inc_sec=(16, 32, 24, 128),
-                 num_classes=1000, test_time_pool=False):
+    def __init__(
+        self,
+        small=False,
+        num_init_features=64,
+        k_r=96,
+        groups=32,
+        b=False,
+        k_sec=(3, 4, 20, 3),
+        inc_sec=(16, 32, 24, 128),
+        num_classes=1000,
+        test_time_pool=False,
+    ):
         super(DPN, self).__init__()
         self.test_time_pool = test_time_pool
         self.b = b
@@ -321,50 +428,60 @@ def __init__(self, small=False, num_init_features=64, k_r=96, groups=32,
 
         # conv1
         if small:
-            blocks['conv1_1'] = InputBlock(num_init_features, kernel_size=3, padding=1)
+            blocks["conv1_1"] = InputBlock(num_init_features, kernel_size=3, padding=1)
         else:
-            blocks['conv1_1'] = InputBlock(num_init_features, kernel_size=7, padding=3)
+            blocks["conv1_1"] = InputBlock(num_init_features, kernel_size=7, padding=3)
 
         # conv2
         bw = 64 * bw_factor
         inc = inc_sec[0]
         r = (k_r * bw) // (64 * bw_factor)
-        blocks['conv2_1'] = DualPathBlock(num_init_features, r, r, bw, inc, groups, 'proj', b)
+        blocks["conv2_1"] = DualPathBlock(
+            num_init_features, r, r, bw, inc, groups, "proj", b
+        )
         in_chs = bw + 3 * inc
         for i in range(2, k_sec[0] + 1):
-            blocks['conv2_' + str(i)] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'normal', b)
+            blocks["conv2_" + str(i)] = DualPathBlock(
+                in_chs, r, r, bw, inc, groups, "normal", b
+            )
             in_chs += inc
 
         # conv3
         bw = 128 * bw_factor
         inc = inc_sec[1]
         r = (k_r * bw) // (64 * bw_factor)
-        blocks['conv3_1'] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'down', b)
+        blocks["conv3_1"] = DualPathBlock(in_chs, r, r, bw, inc, groups, "down", b)
         in_chs = bw + 3 * inc
         for i in range(2, k_sec[1] + 1):
-            blocks['conv3_' + str(i)] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'normal', b)
+            blocks["conv3_" + str(i)] = DualPathBlock(
+                in_chs, r, r, bw, inc, groups, "normal", b
+            )
             in_chs += inc
 
         # conv4
         bw = 256 * bw_factor
         inc = inc_sec[2]
         r = (k_r * bw) // (64 * bw_factor)
-        blocks['conv4_1'] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'down', b)
+        blocks["conv4_1"] = DualPathBlock(in_chs, r, r, bw, inc, groups, "down", b)
         in_chs = bw + 3 * inc
         for i in range(2, k_sec[2] + 1):
-            blocks['conv4_' + str(i)] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'normal', b)
+            blocks["conv4_" + str(i)] = DualPathBlock(
+                in_chs, r, r, bw, inc, groups, "normal", b
+            )
             in_chs += inc
 
         # conv5
         bw = 512 * bw_factor
         inc = inc_sec[3]
         r = (k_r * bw) // (64 * bw_factor)
-        blocks['conv5_1'] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'down', b)
+        blocks["conv5_1"] = DualPathBlock(in_chs, r, r, bw, inc, groups, "down", b)
         in_chs = bw + 3 * inc
         for i in range(2, k_sec[3] + 1):
-            blocks['conv5_' + str(i)] = DualPathBlock(in_chs, r, r, bw, inc, groups, 'normal', b)
+            blocks["conv5_" + str(i)] = DualPathBlock(
+                in_chs, r, r, bw, inc, groups, "normal", b
+            )
             in_chs += inc
-        blocks['conv5_bn_ac'] = CatBnAct(in_chs)
+        blocks["conv5_bn_ac"] = CatBnAct(in_chs)
 
         self.features = nn.Sequential(blocks)
 
@@ -376,9 +493,9 @@ def logits(self, features):
             x = F.avg_pool2d(features, kernel_size=7, stride=1)
             out = self.last_linear(x)
             # The extra test time pool should be pooling an img_size//32 - 6 size patch
-            out = adaptive_avgmax_pool2d(out, pool_type='avgmax')
+            out = adaptive_avgmax_pool2d(out, pool_type="avgmax")
         else:
-            x = adaptive_avgmax_pool2d(features, pool_type='avg')
+            x = adaptive_avgmax_pool2d(features, pool_type="avg")
             out = self.last_linear(x)
         return out.view(out.size(0), -1)
 
@@ -387,6 +504,7 @@ def forward(self, input):
         x = self.logits(x)
         return x
 
+
 """ PyTorch selectable adaptive pooling
 Adaptive pooling with the ability to select the type of pooling from:
     * 'avg' - Average pooling
@@ -399,55 +517,80 @@ def forward(self, input):
 Author: Ross Wightman (rwightman)
 """
 
-def pooling_factor(pool_type='avg'):
-    return 2 if pool_type == 'avgmaxc' else 1
-
 
-def adaptive_avgmax_pool2d(x, pool_type='avg', padding=0, count_include_pad=False):
-    """Selectable global pooling function with dynamic input kernel size
-    """
-    if pool_type == 'avgmaxc':
-        x = torch.cat([
-            F.avg_pool2d(
-                x, kernel_size=(x.size(2), x.size(3)), padding=padding, count_include_pad=count_include_pad),
-            F.max_pool2d(x, kernel_size=(x.size(2), x.size(3)), padding=padding)
-        ], dim=1)
-    elif pool_type == 'avgmax':
+def pooling_factor(pool_type="avg"):
+    return 2 if pool_type == "avgmaxc" else 1
+
+
+def adaptive_avgmax_pool2d(x, pool_type="avg", padding=0, count_include_pad=False):
+    """Selectable global pooling function with dynamic input kernel size"""
+    if pool_type == "avgmaxc":
+        x = torch.cat(
+            [
+                F.avg_pool2d(
+                    x,
+                    kernel_size=(x.size(2), x.size(3)),
+                    padding=padding,
+                    count_include_pad=count_include_pad,
+                ),
+                F.max_pool2d(x, kernel_size=(x.size(2), x.size(3)), padding=padding),
+            ],
+            dim=1,
+        )
+    elif pool_type == "avgmax":
         x_avg = F.avg_pool2d(
-                x, kernel_size=(x.size(2), x.size(3)), padding=padding, count_include_pad=count_include_pad)
+            x,
+            kernel_size=(x.size(2), x.size(3)),
+            padding=padding,
+            count_include_pad=count_include_pad,
+        )
         x_max = F.max_pool2d(x, kernel_size=(x.size(2), x.size(3)), padding=padding)
         x = 0.5 * (x_avg + x_max)
-    elif pool_type == 'max':
+    elif pool_type == "max":
         x = F.max_pool2d(x, kernel_size=(x.size(2), x.size(3)), padding=padding)
     else:
-        if pool_type != 'avg':
-            print('Invalid pool type %s specified. Defaulting to average pooling.' % pool_type)
+        if pool_type != "avg":
+            print(
+                "Invalid pool type %s specified. Defaulting to average pooling."
+                % pool_type
+            )
         x = F.avg_pool2d(
-            x, kernel_size=(x.size(2), x.size(3)), padding=padding, count_include_pad=count_include_pad)
+            x,
+            kernel_size=(x.size(2), x.size(3)),
+            padding=padding,
+            count_include_pad=count_include_pad,
+        )
     return x
 
 
 class AdaptiveAvgMaxPool2d(torch.nn.Module):
-    """Selectable global pooling layer with dynamic input kernel size
-    """
-    def __init__(self, output_size=1, pool_type='avg'):
+    """Selectable global pooling layer with dynamic input kernel size"""
+
+    def __init__(self, output_size=1, pool_type="avg"):
         super(AdaptiveAvgMaxPool2d, self).__init__()
         self.output_size = output_size
         self.pool_type = pool_type
-        if pool_type == 'avgmaxc' or pool_type == 'avgmax':
-            self.pool = nn.ModuleList([nn.AdaptiveAvgPool2d(output_size), nn.AdaptiveMaxPool2d(output_size)])
-        elif pool_type == 'max':
+        if pool_type == "avgmaxc" or pool_type == "avgmax":
+            self.pool = nn.ModuleList(
+                [nn.AdaptiveAvgPool2d(output_size), nn.AdaptiveMaxPool2d(output_size)]
+            )
+        elif pool_type == "max":
             self.pool = nn.AdaptiveMaxPool2d(output_size)
         else:
-            if pool_type != 'avg':
-                print('Invalid pool type %s specified. Defaulting to average pooling.' % pool_type)
+            if pool_type != "avg":
+                print(
+                    "Invalid pool type %s specified. Defaulting to average pooling."
+                    % pool_type
+                )
             self.pool = nn.AdaptiveAvgPool2d(output_size)
 
     def forward(self, x):
-        if self.pool_type == 'avgmaxc':
+        if self.pool_type == "avgmaxc":
             x = torch.cat([p(x) for p in self.pool], dim=1)
-        elif self.pool_type == 'avgmax':
-            x = 0.5 * torch.sum(torch.stack([p(x) for p in self.pool]), 0).squeeze(dim=0)
+        elif self.pool_type == "avgmax":
+            x = 0.5 * torch.sum(torch.stack([p(x) for p in self.pool]), 0).squeeze(
+                dim=0
+            )
         else:
             x = self.pool(x)
         return x
@@ -456,6 +599,12 @@ def factor(self):
         return pooling_factor(self.pool_type)
 
     def __repr__(self):
-        return self.__class__.__name__ + ' (' \
-               + 'output_size=' + str(self.output_size) \
-               + ', pool_type=' + self.pool_type + ')'
+        return (
+            self.__class__.__name__
+            + " ("
+            + "output_size="
+            + str(self.output_size)
+            + ", pool_type="
+            + self.pool_type
+            + ")"
+        )
diff --git a/segmentation_models_pytorch/encoders/_inceptionresnetv2.py b/segmentation_models_pytorch/encoders/_inceptionresnetv2.py
index 8f55bb0b..425d4261 100644
--- a/segmentation_models_pytorch/encoders/_inceptionresnetv2.py
+++ b/segmentation_models_pytorch/encoders/_inceptionresnetv2.py
@@ -2,46 +2,50 @@
 import torch
 import torch.nn as nn
 import torch.utils.model_zoo as model_zoo
-import os
-import sys
 
-__all__ = ['InceptionResNetV2', 'inceptionresnetv2']
+__all__ = ["InceptionResNetV2", "inceptionresnetv2"]
 
 pretrained_settings = {
-    'inceptionresnetv2': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 299, 299],
-            'input_range': [0, 1],
-            'mean': [0.5, 0.5, 0.5],
-            'std': [0.5, 0.5, 0.5],
-            'num_classes': 1000
+    "inceptionresnetv2": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth",
+            "input_space": "RGB",
+            "input_size": [3, 299, 299],
+            "input_range": [0, 1],
+            "mean": [0.5, 0.5, 0.5],
+            "std": [0.5, 0.5, 0.5],
+            "num_classes": 1000,
+        },
+        "imagenet+background": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth",
+            "input_space": "RGB",
+            "input_size": [3, 299, 299],
+            "input_range": [0, 1],
+            "mean": [0.5, 0.5, 0.5],
+            "std": [0.5, 0.5, 0.5],
+            "num_classes": 1001,
         },
-        'imagenet+background': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 299, 299],
-            'input_range': [0, 1],
-            'mean': [0.5, 0.5, 0.5],
-            'std': [0.5, 0.5, 0.5],
-            'num_classes': 1001
-        }
     }
 }
 
 
 class BasicConv2d(nn.Module):
-
     def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
         super(BasicConv2d, self).__init__()
-        self.conv = nn.Conv2d(in_planes, out_planes,
-                              kernel_size=kernel_size, stride=stride,
-                              padding=padding, bias=False) # verify bias false
-        self.bn = nn.BatchNorm2d(out_planes,
-                                 eps=0.001, # value found in tensorflow
-                                 momentum=0.1, # default pytorch value
-                                 affine=True)
+        self.conv = nn.Conv2d(
+            in_planes,
+            out_planes,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=False,
+        )  # verify bias false
+        self.bn = nn.BatchNorm2d(
+            out_planes,
+            eps=0.001,  # value found in tensorflow
+            momentum=0.1,  # default pytorch value
+            affine=True,
+        )
         self.relu = nn.ReLU(inplace=False)
 
     def forward(self, x):
@@ -52,7 +56,6 @@ def forward(self, x):
 
 
 class Mixed_5b(nn.Module):
-
     def __init__(self):
         super(Mixed_5b, self).__init__()
 
@@ -60,18 +63,18 @@ def __init__(self):
 
         self.branch1 = nn.Sequential(
             BasicConv2d(192, 48, kernel_size=1, stride=1),
-            BasicConv2d(48, 64, kernel_size=5, stride=1, padding=2)
+            BasicConv2d(48, 64, kernel_size=5, stride=1, padding=2),
         )
 
         self.branch2 = nn.Sequential(
             BasicConv2d(192, 64, kernel_size=1, stride=1),
             BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
-            BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)
+            BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1),
         )
 
         self.branch3 = nn.Sequential(
             nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
-            BasicConv2d(192, 64, kernel_size=1, stride=1)
+            BasicConv2d(192, 64, kernel_size=1, stride=1),
         )
 
     def forward(self, x):
@@ -84,7 +87,6 @@ def forward(self, x):
 
 
 class Block35(nn.Module):
-
     def __init__(self, scale=1.0):
         super(Block35, self).__init__()
 
@@ -94,13 +96,13 @@ def __init__(self, scale=1.0):
 
         self.branch1 = nn.Sequential(
             BasicConv2d(320, 32, kernel_size=1, stride=1),
-            BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
+            BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1),
         )
 
         self.branch2 = nn.Sequential(
             BasicConv2d(320, 32, kernel_size=1, stride=1),
             BasicConv2d(32, 48, kernel_size=3, stride=1, padding=1),
-            BasicConv2d(48, 64, kernel_size=3, stride=1, padding=1)
+            BasicConv2d(48, 64, kernel_size=3, stride=1, padding=1),
         )
 
         self.conv2d = nn.Conv2d(128, 320, kernel_size=1, stride=1)
@@ -118,7 +120,6 @@ def forward(self, x):
 
 
 class Mixed_6a(nn.Module):
-
     def __init__(self):
         super(Mixed_6a, self).__init__()
 
@@ -127,7 +128,7 @@ def __init__(self):
         self.branch1 = nn.Sequential(
             BasicConv2d(320, 256, kernel_size=1, stride=1),
             BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1),
-            BasicConv2d(256, 384, kernel_size=3, stride=2)
+            BasicConv2d(256, 384, kernel_size=3, stride=2),
         )
 
         self.branch2 = nn.MaxPool2d(3, stride=2)
@@ -141,7 +142,6 @@ def forward(self, x):
 
 
 class Block17(nn.Module):
-
     def __init__(self, scale=1.0):
         super(Block17, self).__init__()
 
@@ -151,8 +151,8 @@ def __init__(self, scale=1.0):
 
         self.branch1 = nn.Sequential(
             BasicConv2d(1088, 128, kernel_size=1, stride=1),
-            BasicConv2d(128, 160, kernel_size=(1,7), stride=1, padding=(0,3)),
-            BasicConv2d(160, 192, kernel_size=(7,1), stride=1, padding=(3,0))
+            BasicConv2d(128, 160, kernel_size=(1, 7), stride=1, padding=(0, 3)),
+            BasicConv2d(160, 192, kernel_size=(7, 1), stride=1, padding=(3, 0)),
         )
 
         self.conv2d = nn.Conv2d(384, 1088, kernel_size=1, stride=1)
@@ -169,24 +169,23 @@ def forward(self, x):
 
 
 class Mixed_7a(nn.Module):
-
     def __init__(self):
         super(Mixed_7a, self).__init__()
 
         self.branch0 = nn.Sequential(
             BasicConv2d(1088, 256, kernel_size=1, stride=1),
-            BasicConv2d(256, 384, kernel_size=3, stride=2)
+            BasicConv2d(256, 384, kernel_size=3, stride=2),
         )
 
         self.branch1 = nn.Sequential(
             BasicConv2d(1088, 256, kernel_size=1, stride=1),
-            BasicConv2d(256, 288, kernel_size=3, stride=2)
+            BasicConv2d(256, 288, kernel_size=3, stride=2),
         )
 
         self.branch2 = nn.Sequential(
             BasicConv2d(1088, 256, kernel_size=1, stride=1),
             BasicConv2d(256, 288, kernel_size=3, stride=1, padding=1),
-            BasicConv2d(288, 320, kernel_size=3, stride=2)
+            BasicConv2d(288, 320, kernel_size=3, stride=2),
         )
 
         self.branch3 = nn.MaxPool2d(3, stride=2)
@@ -201,7 +200,6 @@ def forward(self, x):
 
 
 class Block8(nn.Module):
-
     def __init__(self, scale=1.0, noReLU=False):
         super(Block8, self).__init__()
 
@@ -212,8 +210,8 @@ def __init__(self, scale=1.0, noReLU=False):
 
         self.branch1 = nn.Sequential(
             BasicConv2d(2080, 192, kernel_size=1, stride=1),
-            BasicConv2d(192, 224, kernel_size=(1,3), stride=1, padding=(0,1)),
-            BasicConv2d(224, 256, kernel_size=(3,1), stride=1, padding=(1,0))
+            BasicConv2d(192, 224, kernel_size=(1, 3), stride=1, padding=(0, 1)),
+            BasicConv2d(224, 256, kernel_size=(3, 1), stride=1, padding=(1, 0)),
         )
 
         self.conv2d = nn.Conv2d(448, 2080, kernel_size=1, stride=1)
@@ -232,7 +230,6 @@ def forward(self, x):
 
 
 class InceptionResNetV2(nn.Module):
-
     def __init__(self, num_classes=1001):
         super(InceptionResNetV2, self).__init__()
         # Special attributs
@@ -259,7 +256,7 @@ def __init__(self, num_classes=1001):
             Block35(scale=0.17),
             Block35(scale=0.17),
             Block35(scale=0.17),
-            Block35(scale=0.17)
+            Block35(scale=0.17),
         )
         self.mixed_6a = Mixed_6a()
         self.repeat_1 = nn.Sequential(
@@ -282,7 +279,7 @@ def __init__(self, num_classes=1001):
             Block17(scale=0.10),
             Block17(scale=0.10),
             Block17(scale=0.10),
-            Block17(scale=0.10)
+            Block17(scale=0.10),
         )
         self.mixed_7a = Mixed_7a()
         self.repeat_2 = nn.Sequential(
@@ -294,7 +291,7 @@ def __init__(self, num_classes=1001):
             Block8(scale=0.20),
             Block8(scale=0.20),
             Block8(scale=0.20),
-            Block8(scale=0.20)
+            Block8(scale=0.20),
         )
         self.block8 = Block8(noReLU=True)
         self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1)
@@ -330,51 +327,55 @@ def forward(self, input):
         x = self.logits(x)
         return x
 
-def inceptionresnetv2(num_classes=1000, pretrained='imagenet'):
+
+def inceptionresnetv2(num_classes=1000, pretrained="imagenet"):
     r"""InceptionResNetV2 model architecture from the
     `"InceptionV4, Inception-ResNet..." <https://arxiv.org/abs/1602.07261>`_ paper.
     """
     if pretrained:
-        settings = pretrained_settings['inceptionresnetv2'][pretrained]
-        assert num_classes == settings['num_classes'], \
-            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
+        settings = pretrained_settings["inceptionresnetv2"][pretrained]
+        assert num_classes == settings["num_classes"], (
+            "num_classes should be {}, but is {}".format(
+                settings["num_classes"], num_classes
+            )
+        )
 
         # both 'imagenet'&'imagenet+background' are loaded from same parameters
         model = InceptionResNetV2(num_classes=1001)
-        model.load_state_dict(model_zoo.load_url(settings['url']))
+        model.load_state_dict(model_zoo.load_url(settings["url"]))
 
-        if pretrained == 'imagenet':
+        if pretrained == "imagenet":
             new_last_linear = nn.Linear(1536, 1000)
             new_last_linear.weight.data = model.last_linear.weight.data[1:]
             new_last_linear.bias.data = model.last_linear.bias.data[1:]
             model.last_linear = new_last_linear
 
-        model.input_space = settings['input_space']
-        model.input_size = settings['input_size']
-        model.input_range = settings['input_range']
+        model.input_space = settings["input_space"]
+        model.input_size = settings["input_size"]
+        model.input_range = settings["input_range"]
 
-        model.mean = settings['mean']
-        model.std = settings['std']
+        model.mean = settings["mean"]
+        model.std = settings["std"]
     else:
         model = InceptionResNetV2(num_classes=num_classes)
     return model
 
-'''
+
+"""
 TEST
 Run this code with:
 ```
 cd $HOME/pretrained-models.pytorch
 python -m pretrainedmodels.inceptionresnetv2
 ```
-'''
-if __name__ == '__main__':
-
+"""
+if __name__ == "__main__":
     assert inceptionresnetv2(num_classes=10, pretrained=None)
-    print('success')
-    assert inceptionresnetv2(num_classes=1000, pretrained='imagenet')
-    print('success')
-    assert inceptionresnetv2(num_classes=1001, pretrained='imagenet+background')
-    print('success')
+    print("success")
+    assert inceptionresnetv2(num_classes=1000, pretrained="imagenet")
+    print("success")
+    assert inceptionresnetv2(num_classes=1001, pretrained="imagenet+background")
+    print("success")
 
     # fail
-    assert inceptionresnetv2(num_classes=1001, pretrained='imagenet')
\ No newline at end of file
+    assert inceptionresnetv2(num_classes=1001, pretrained="imagenet")
diff --git a/segmentation_models_pytorch/encoders/_inceptionv4.py b/segmentation_models_pytorch/encoders/_inceptionv4.py
index d48f7b77..14b8eadb 100644
--- a/segmentation_models_pytorch/encoders/_inceptionv4.py
+++ b/segmentation_models_pytorch/encoders/_inceptionv4.py
@@ -3,46 +3,50 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.utils.model_zoo as model_zoo
-import os
-import sys
 
-__all__ = ['InceptionV4', 'inceptionv4']
+__all__ = ["InceptionV4", "inceptionv4"]
 
 pretrained_settings = {
-    'inceptionv4': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 299, 299],
-            'input_range': [0, 1],
-            'mean': [0.5, 0.5, 0.5],
-            'std': [0.5, 0.5, 0.5],
-            'num_classes': 1000
+    "inceptionv4": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth",
+            "input_space": "RGB",
+            "input_size": [3, 299, 299],
+            "input_range": [0, 1],
+            "mean": [0.5, 0.5, 0.5],
+            "std": [0.5, 0.5, 0.5],
+            "num_classes": 1000,
+        },
+        "imagenet+background": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth",
+            "input_space": "RGB",
+            "input_size": [3, 299, 299],
+            "input_range": [0, 1],
+            "mean": [0.5, 0.5, 0.5],
+            "std": [0.5, 0.5, 0.5],
+            "num_classes": 1001,
         },
-        'imagenet+background': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 299, 299],
-            'input_range': [0, 1],
-            'mean': [0.5, 0.5, 0.5],
-            'std': [0.5, 0.5, 0.5],
-            'num_classes': 1001
-        }
     }
 }
 
 
 class BasicConv2d(nn.Module):
-
     def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
         super(BasicConv2d, self).__init__()
-        self.conv = nn.Conv2d(in_planes, out_planes,
-                              kernel_size=kernel_size, stride=stride,
-                              padding=padding, bias=False) # verify bias false
-        self.bn = nn.BatchNorm2d(out_planes,
-                                 eps=0.001, # value found in tensorflow
-                                 momentum=0.1, # default pytorch value
-                                 affine=True)
+        self.conv = nn.Conv2d(
+            in_planes,
+            out_planes,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=False,
+        )  # verify bias false
+        self.bn = nn.BatchNorm2d(
+            out_planes,
+            eps=0.001,  # value found in tensorflow
+            momentum=0.1,  # default pytorch value
+            affine=True,
+        )
         self.relu = nn.ReLU(inplace=True)
 
     def forward(self, x):
@@ -53,7 +57,6 @@ def forward(self, x):
 
 
 class Mixed_3a(nn.Module):
-
     def __init__(self):
         super(Mixed_3a, self).__init__()
         self.maxpool = nn.MaxPool2d(3, stride=2)
@@ -67,20 +70,19 @@ def forward(self, x):
 
 
 class Mixed_4a(nn.Module):
-
     def __init__(self):
         super(Mixed_4a, self).__init__()
 
         self.branch0 = nn.Sequential(
             BasicConv2d(160, 64, kernel_size=1, stride=1),
-            BasicConv2d(64, 96, kernel_size=3, stride=1)
+            BasicConv2d(64, 96, kernel_size=3, stride=1),
         )
 
         self.branch1 = nn.Sequential(
             BasicConv2d(160, 64, kernel_size=1, stride=1),
-            BasicConv2d(64, 64, kernel_size=(1,7), stride=1, padding=(0,3)),
-            BasicConv2d(64, 64, kernel_size=(7,1), stride=1, padding=(3,0)),
-            BasicConv2d(64, 96, kernel_size=(3,3), stride=1)
+            BasicConv2d(64, 64, kernel_size=(1, 7), stride=1, padding=(0, 3)),
+            BasicConv2d(64, 64, kernel_size=(7, 1), stride=1, padding=(3, 0)),
+            BasicConv2d(64, 96, kernel_size=(3, 3), stride=1),
         )
 
     def forward(self, x):
@@ -91,7 +93,6 @@ def forward(self, x):
 
 
 class Mixed_5a(nn.Module):
-
     def __init__(self):
         super(Mixed_5a, self).__init__()
         self.conv = BasicConv2d(192, 192, kernel_size=3, stride=2)
@@ -105,25 +106,24 @@ def forward(self, x):
 
 
 class Inception_A(nn.Module):
-
     def __init__(self):
         super(Inception_A, self).__init__()
         self.branch0 = BasicConv2d(384, 96, kernel_size=1, stride=1)
 
         self.branch1 = nn.Sequential(
             BasicConv2d(384, 64, kernel_size=1, stride=1),
-            BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1)
+            BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
         )
 
         self.branch2 = nn.Sequential(
             BasicConv2d(384, 64, kernel_size=1, stride=1),
             BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
-            BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)
+            BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1),
         )
 
         self.branch3 = nn.Sequential(
             nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
-            BasicConv2d(384, 96, kernel_size=1, stride=1)
+            BasicConv2d(384, 96, kernel_size=1, stride=1),
         )
 
     def forward(self, x):
@@ -136,7 +136,6 @@ def forward(self, x):
 
 
 class Reduction_A(nn.Module):
-
     def __init__(self):
         super(Reduction_A, self).__init__()
         self.branch0 = BasicConv2d(384, 384, kernel_size=3, stride=2)
@@ -144,7 +143,7 @@ def __init__(self):
         self.branch1 = nn.Sequential(
             BasicConv2d(384, 192, kernel_size=1, stride=1),
             BasicConv2d(192, 224, kernel_size=3, stride=1, padding=1),
-            BasicConv2d(224, 256, kernel_size=3, stride=2)
+            BasicConv2d(224, 256, kernel_size=3, stride=2),
         )
 
         self.branch2 = nn.MaxPool2d(3, stride=2)
@@ -158,28 +157,27 @@ def forward(self, x):
 
 
 class Inception_B(nn.Module):
-
     def __init__(self):
         super(Inception_B, self).__init__()
         self.branch0 = BasicConv2d(1024, 384, kernel_size=1, stride=1)
 
         self.branch1 = nn.Sequential(
             BasicConv2d(1024, 192, kernel_size=1, stride=1),
-            BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)),
-            BasicConv2d(224, 256, kernel_size=(7,1), stride=1, padding=(3,0))
+            BasicConv2d(192, 224, kernel_size=(1, 7), stride=1, padding=(0, 3)),
+            BasicConv2d(224, 256, kernel_size=(7, 1), stride=1, padding=(3, 0)),
         )
 
         self.branch2 = nn.Sequential(
             BasicConv2d(1024, 192, kernel_size=1, stride=1),
-            BasicConv2d(192, 192, kernel_size=(7,1), stride=1, padding=(3,0)),
-            BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)),
-            BasicConv2d(224, 224, kernel_size=(7,1), stride=1, padding=(3,0)),
-            BasicConv2d(224, 256, kernel_size=(1,7), stride=1, padding=(0,3))
+            BasicConv2d(192, 192, kernel_size=(7, 1), stride=1, padding=(3, 0)),
+            BasicConv2d(192, 224, kernel_size=(1, 7), stride=1, padding=(0, 3)),
+            BasicConv2d(224, 224, kernel_size=(7, 1), stride=1, padding=(3, 0)),
+            BasicConv2d(224, 256, kernel_size=(1, 7), stride=1, padding=(0, 3)),
         )
 
         self.branch3 = nn.Sequential(
             nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
-            BasicConv2d(1024, 128, kernel_size=1, stride=1)
+            BasicConv2d(1024, 128, kernel_size=1, stride=1),
         )
 
     def forward(self, x):
@@ -192,20 +190,19 @@ def forward(self, x):
 
 
 class Reduction_B(nn.Module):
-
     def __init__(self):
         super(Reduction_B, self).__init__()
 
         self.branch0 = nn.Sequential(
             BasicConv2d(1024, 192, kernel_size=1, stride=1),
-            BasicConv2d(192, 192, kernel_size=3, stride=2)
+            BasicConv2d(192, 192, kernel_size=3, stride=2),
         )
 
         self.branch1 = nn.Sequential(
             BasicConv2d(1024, 256, kernel_size=1, stride=1),
-            BasicConv2d(256, 256, kernel_size=(1,7), stride=1, padding=(0,3)),
-            BasicConv2d(256, 320, kernel_size=(7,1), stride=1, padding=(3,0)),
-            BasicConv2d(320, 320, kernel_size=3, stride=2)
+            BasicConv2d(256, 256, kernel_size=(1, 7), stride=1, padding=(0, 3)),
+            BasicConv2d(256, 320, kernel_size=(7, 1), stride=1, padding=(3, 0)),
+            BasicConv2d(320, 320, kernel_size=3, stride=2),
         )
 
         self.branch2 = nn.MaxPool2d(3, stride=2)
@@ -219,25 +216,36 @@ def forward(self, x):
 
 
 class Inception_C(nn.Module):
-
     def __init__(self):
         super(Inception_C, self).__init__()
 
         self.branch0 = BasicConv2d(1536, 256, kernel_size=1, stride=1)
 
         self.branch1_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
-        self.branch1_1a = BasicConv2d(384, 256, kernel_size=(1,3), stride=1, padding=(0,1))
-        self.branch1_1b = BasicConv2d(384, 256, kernel_size=(3,1), stride=1, padding=(1,0))
+        self.branch1_1a = BasicConv2d(
+            384, 256, kernel_size=(1, 3), stride=1, padding=(0, 1)
+        )
+        self.branch1_1b = BasicConv2d(
+            384, 256, kernel_size=(3, 1), stride=1, padding=(1, 0)
+        )
 
         self.branch2_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
-        self.branch2_1 = BasicConv2d(384, 448, kernel_size=(3,1), stride=1, padding=(1,0))
-        self.branch2_2 = BasicConv2d(448, 512, kernel_size=(1,3), stride=1, padding=(0,1))
-        self.branch2_3a = BasicConv2d(512, 256, kernel_size=(1,3), stride=1, padding=(0,1))
-        self.branch2_3b = BasicConv2d(512, 256, kernel_size=(3,1), stride=1, padding=(1,0))
+        self.branch2_1 = BasicConv2d(
+            384, 448, kernel_size=(3, 1), stride=1, padding=(1, 0)
+        )
+        self.branch2_2 = BasicConv2d(
+            448, 512, kernel_size=(1, 3), stride=1, padding=(0, 1)
+        )
+        self.branch2_3a = BasicConv2d(
+            512, 256, kernel_size=(1, 3), stride=1, padding=(0, 1)
+        )
+        self.branch2_3b = BasicConv2d(
+            512, 256, kernel_size=(3, 1), stride=1, padding=(1, 0)
+        )
 
         self.branch3 = nn.Sequential(
             nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
-            BasicConv2d(1536, 256, kernel_size=1, stride=1)
+            BasicConv2d(1536, 256, kernel_size=1, stride=1),
         )
 
     def forward(self, x):
@@ -262,7 +270,6 @@ def forward(self, x):
 
 
 class InceptionV4(nn.Module):
-
     def __init__(self, num_classes=1001):
         super(InceptionV4, self).__init__()
         # Special attributs
@@ -282,7 +289,7 @@ def __init__(self, num_classes=1001):
             Inception_A(),
             Inception_A(),
             Inception_A(),
-            Reduction_A(), # Mixed_6a
+            Reduction_A(),  # Mixed_6a
             Inception_B(),
             Inception_B(),
             Inception_B(),
@@ -290,15 +297,15 @@ def __init__(self, num_classes=1001):
             Inception_B(),
             Inception_B(),
             Inception_B(),
-            Reduction_B(), # Mixed_7a
+            Reduction_B(),  # Mixed_7a
+            Inception_C(),
             Inception_C(),
             Inception_C(),
-            Inception_C()
         )
         self.last_linear = nn.Linear(1536, num_classes)
 
     def logits(self, features):
-        #Allows image of any size to be processed
+        # Allows image of any size to be processed
         adaptiveAvgPoolWidth = features.shape[2]
         x = F.avg_pool2d(features, kernel_size=adaptiveAvgPoolWidth)
         x = x.view(x.size(0), -1)
@@ -311,48 +318,50 @@ def forward(self, input):
         return x
 
 
-def inceptionv4(num_classes=1000, pretrained='imagenet'):
+def inceptionv4(num_classes=1000, pretrained="imagenet"):
     if pretrained:
-        settings = pretrained_settings['inceptionv4'][pretrained]
-        assert num_classes == settings['num_classes'], \
-            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
+        settings = pretrained_settings["inceptionv4"][pretrained]
+        assert num_classes == settings["num_classes"], (
+            "num_classes should be {}, but is {}".format(
+                settings["num_classes"], num_classes
+            )
+        )
 
         # both 'imagenet'&'imagenet+background' are loaded from same parameters
         model = InceptionV4(num_classes=1001)
-        model.load_state_dict(model_zoo.load_url(settings['url']))
+        model.load_state_dict(model_zoo.load_url(settings["url"]))
 
-        if pretrained == 'imagenet':
+        if pretrained == "imagenet":
             new_last_linear = nn.Linear(1536, 1000)
             new_last_linear.weight.data = model.last_linear.weight.data[1:]
             new_last_linear.bias.data = model.last_linear.bias.data[1:]
             model.last_linear = new_last_linear
 
-        model.input_space = settings['input_space']
-        model.input_size = settings['input_size']
-        model.input_range = settings['input_range']
-        model.mean = settings['mean']
-        model.std = settings['std']
+        model.input_space = settings["input_space"]
+        model.input_size = settings["input_size"]
+        model.input_range = settings["input_range"]
+        model.mean = settings["mean"]
+        model.std = settings["std"]
     else:
         model = InceptionV4(num_classes=num_classes)
     return model
 
 
-'''
+"""
 TEST
 Run this code with:
 ```
 cd $HOME/pretrained-models.pytorch
 python -m pretrainedmodels.inceptionv4
 ```
-'''
-if __name__ == '__main__':
-
+"""
+if __name__ == "__main__":
     assert inceptionv4(num_classes=10, pretrained=None)
-    print('success')
-    assert inceptionv4(num_classes=1000, pretrained='imagenet')
-    print('success')
-    assert inceptionv4(num_classes=1001, pretrained='imagenet+background')
-    print('success')
+    print("success")
+    assert inceptionv4(num_classes=1000, pretrained="imagenet")
+    print("success")
+    assert inceptionv4(num_classes=1001, pretrained="imagenet+background")
+    print("success")
 
     # fail
-    assert inceptionv4(num_classes=1001, pretrained='imagenet')
+    assert inceptionv4(num_classes=1001, pretrained="imagenet")
diff --git a/segmentation_models_pytorch/encoders/_senet.py b/segmentation_models_pytorch/encoders/_senet.py
index 20bd122f..1e555ca1 100644
--- a/segmentation_models_pytorch/encoders/_senet.py
+++ b/segmentation_models_pytorch/encoders/_senet.py
@@ -2,6 +2,7 @@
 ResNet code gently borrowed from
 https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
 """
+
 from __future__ import print_function, division, absolute_import
 from collections import OrderedDict
 import math
@@ -9,89 +10,93 @@
 import torch.nn as nn
 from torch.utils import model_zoo
 
-__all__ = ['SENet', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152',
-           'se_resnext50_32x4d', 'se_resnext101_32x4d']
+__all__ = [
+    "SENet",
+    "senet154",
+    "se_resnet50",
+    "se_resnet101",
+    "se_resnet152",
+    "se_resnext50_32x4d",
+    "se_resnext101_32x4d",
+]
 
 pretrained_settings = {
-    'senet154': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
+    "senet154": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }
     },
-    'se_resnet50': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
+    "se_resnet50": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }
     },
-    'se_resnet101': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
+    "se_resnet101": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }
     },
-    'se_resnet152': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
+    "se_resnet152": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }
     },
-    'se_resnext50_32x4d': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
+    "se_resnext50_32x4d": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }
     },
-    'se_resnext101_32x4d': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
+    "se_resnext101_32x4d": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth",
+            "input_space": "RGB",
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }
     },
 }
 
 
 class SEModule(nn.Module):
-
     def __init__(self, channels, reduction):
         super(SEModule, self).__init__()
         self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
-                             padding=0)
+        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0)
         self.relu = nn.ReLU(inplace=True)
-        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
-                             padding=0)
+        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0)
         self.sigmoid = nn.Sigmoid()
 
     def forward(self, x):
@@ -108,6 +113,7 @@ class Bottleneck(nn.Module):
     """
     Base class for bottlenecks that implements `forward()` method.
     """
+
     def forward(self, x):
         residual = x
 
@@ -135,19 +141,24 @@ class SEBottleneck(Bottleneck):
     """
     Bottleneck for SENet154.
     """
+
     expansion = 4
 
-    def __init__(self, inplanes, planes, groups, reduction, stride=1,
-                 downsample=None):
+    def __init__(self, inplanes, planes, groups, reduction, stride=1, downsample=None):
         super(SEBottleneck, self).__init__()
         self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
         self.bn1 = nn.BatchNorm2d(planes * 2)
-        self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
-                               stride=stride, padding=1, groups=groups,
-                               bias=False)
+        self.conv2 = nn.Conv2d(
+            planes * 2,
+            planes * 4,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=groups,
+            bias=False,
+        )
         self.bn2 = nn.BatchNorm2d(planes * 4)
-        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
-                               bias=False)
+        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1, bias=False)
         self.bn3 = nn.BatchNorm2d(planes * 4)
         self.relu = nn.ReLU(inplace=True)
         self.se_module = SEModule(planes * 4, reduction=reduction)
@@ -161,16 +172,18 @@ class SEResNetBottleneck(Bottleneck):
     implementation and uses `stride=stride` in `conv1` and not in `conv2`
     (the latter is used in the torchvision implementation of ResNet).
     """
+
     expansion = 4
 
-    def __init__(self, inplanes, planes, groups, reduction, stride=1,
-                 downsample=None):
+    def __init__(self, inplanes, planes, groups, reduction, stride=1, downsample=None):
         super(SEResNetBottleneck, self).__init__()
-        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
-                               stride=stride)
+        self.conv1 = nn.Conv2d(
+            inplanes, planes, kernel_size=1, bias=False, stride=stride
+        )
         self.bn1 = nn.BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
-                               groups=groups, bias=False)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, padding=1, groups=groups, bias=False
+        )
         self.bn2 = nn.BatchNorm2d(planes)
         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
         self.bn3 = nn.BatchNorm2d(planes * 4)
@@ -184,17 +197,32 @@ class SEResNeXtBottleneck(Bottleneck):
     """
     ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
     """
+
     expansion = 4
 
-    def __init__(self, inplanes, planes, groups, reduction, stride=1,
-                 downsample=None, base_width=4):
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        groups,
+        reduction,
+        stride=1,
+        downsample=None,
+        base_width=4,
+    ):
         super(SEResNeXtBottleneck, self).__init__()
         width = math.floor(planes * (base_width / 64)) * groups
-        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
-                               stride=1)
+        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, stride=1)
         self.bn1 = nn.BatchNorm2d(width)
-        self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
-                               padding=1, groups=groups, bias=False)
+        self.conv2 = nn.Conv2d(
+            width,
+            width,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=groups,
+            bias=False,
+        )
         self.bn2 = nn.BatchNorm2d(width)
         self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
         self.bn3 = nn.BatchNorm2d(planes * 4)
@@ -205,10 +233,19 @@ def __init__(self, inplanes, planes, groups, reduction, stride=1,
 
 
 class SENet(nn.Module):
-
-    def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
-                 inplanes=128, input_3x3=True, downsample_kernel_size=3,
-                 downsample_padding=1, num_classes=1000):
+    def __init__(
+        self,
+        block,
+        layers,
+        groups,
+        reduction,
+        dropout_p=0.2,
+        inplanes=128,
+        input_3x3=True,
+        downsample_kernel_size=3,
+        downsample_padding=1,
+        num_classes=1000,
+    ):
         """
         Parameters
         ----------
@@ -256,30 +293,30 @@ def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
         self.inplanes = inplanes
         if input_3x3:
             layer0_modules = [
-                ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
-                                    bias=False)),
-                ('bn1', nn.BatchNorm2d(64)),
-                ('relu1', nn.ReLU(inplace=True)),
-                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
-                                    bias=False)),
-                ('bn2', nn.BatchNorm2d(64)),
-                ('relu2', nn.ReLU(inplace=True)),
-                ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
-                                    bias=False)),
-                ('bn3', nn.BatchNorm2d(inplanes)),
-                ('relu3', nn.ReLU(inplace=True)),
+                ("conv1", nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False)),
+                ("bn1", nn.BatchNorm2d(64)),
+                ("relu1", nn.ReLU(inplace=True)),
+                ("conv2", nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False)),
+                ("bn2", nn.BatchNorm2d(64)),
+                ("relu2", nn.ReLU(inplace=True)),
+                ("conv3", nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)),
+                ("bn3", nn.BatchNorm2d(inplanes)),
+                ("relu3", nn.ReLU(inplace=True)),
             ]
         else:
             layer0_modules = [
-                ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
-                                    padding=3, bias=False)),
-                ('bn1', nn.BatchNorm2d(inplanes)),
-                ('relu1', nn.ReLU(inplace=True)),
+                (
+                    "conv1",
+                    nn.Conv2d(
+                        3, inplanes, kernel_size=7, stride=2, padding=3, bias=False
+                    ),
+                ),
+                ("bn1", nn.BatchNorm2d(inplanes)),
+                ("relu1", nn.ReLU(inplace=True)),
             ]
         # To preserve compatibility with Caffe weights `ceil_mode=True`
         # is used instead of `padding=1`.
-        layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
-                                                    ceil_mode=True)))
+        layer0_modules.append(("pool", nn.MaxPool2d(3, stride=2, ceil_mode=True)))
         self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
         self.layer1 = self._make_layer(
             block,
@@ -288,7 +325,7 @@ def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
             groups=groups,
             reduction=reduction,
             downsample_kernel_size=1,
-            downsample_padding=0
+            downsample_padding=0,
         )
         self.layer2 = self._make_layer(
             block,
@@ -298,7 +335,7 @@ def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
             groups=groups,
             reduction=reduction,
             downsample_kernel_size=downsample_kernel_size,
-            downsample_padding=downsample_padding
+            downsample_padding=downsample_padding,
         )
         self.layer3 = self._make_layer(
             block,
@@ -308,7 +345,7 @@ def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
             groups=groups,
             reduction=reduction,
             downsample_kernel_size=downsample_kernel_size,
-            downsample_padding=downsample_padding
+            downsample_padding=downsample_padding,
         )
         self.layer4 = self._make_layer(
             block,
@@ -318,26 +355,41 @@ def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
             groups=groups,
             reduction=reduction,
             downsample_kernel_size=downsample_kernel_size,
-            downsample_padding=downsample_padding
+            downsample_padding=downsample_padding,
         )
         self.avg_pool = nn.AvgPool2d(7, stride=1)
         self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
         self.last_linear = nn.Linear(512 * block.expansion, num_classes)
 
-    def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
-                    downsample_kernel_size=1, downsample_padding=0):
+    def _make_layer(
+        self,
+        block,
+        planes,
+        blocks,
+        groups,
+        reduction,
+        stride=1,
+        downsample_kernel_size=1,
+        downsample_padding=0,
+    ):
         downsample = None
         if stride != 1 or self.inplanes != planes * block.expansion:
             downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=downsample_kernel_size, stride=stride,
-                          padding=downsample_padding, bias=False),
+                nn.Conv2d(
+                    self.inplanes,
+                    planes * block.expansion,
+                    kernel_size=downsample_kernel_size,
+                    stride=stride,
+                    padding=downsample_padding,
+                    bias=False,
+                ),
                 nn.BatchNorm2d(planes * block.expansion),
             )
 
         layers = []
-        layers.append(block(self.inplanes, planes, groups, reduction, stride,
-                            downsample))
+        layers.append(
+            block(self.inplanes, planes, groups, reduction, stride, downsample)
+        )
         self.inplanes = planes * block.expansion
         for i in range(1, blocks):
             layers.append(block(self.inplanes, planes, groups, reduction))
@@ -367,76 +419,124 @@ def forward(self, x):
 
 
 def initialize_pretrained_model(model, num_classes, settings):
-    assert num_classes == settings['num_classes'], \
-        'num_classes should be {}, but is {}'.format(
-            settings['num_classes'], num_classes)
-    model.load_state_dict(model_zoo.load_url(settings['url']))
-    model.input_space = settings['input_space']
-    model.input_size = settings['input_size']
-    model.input_range = settings['input_range']
-    model.mean = settings['mean']
-    model.std = settings['std']
-
-
-def senet154(num_classes=1000, pretrained='imagenet'):
-    model = SENet(SEBottleneck, [3, 8, 36, 3], groups=64, reduction=16,
-                  dropout_p=0.2, num_classes=num_classes)
+    assert num_classes == settings["num_classes"], (
+        "num_classes should be {}, but is {}".format(
+            settings["num_classes"], num_classes
+        )
+    )
+    model.load_state_dict(model_zoo.load_url(settings["url"]))
+    model.input_space = settings["input_space"]
+    model.input_size = settings["input_size"]
+    model.input_range = settings["input_range"]
+    model.mean = settings["mean"]
+    model.std = settings["std"]
+
+
+def senet154(num_classes=1000, pretrained="imagenet"):
+    model = SENet(
+        SEBottleneck,
+        [3, 8, 36, 3],
+        groups=64,
+        reduction=16,
+        dropout_p=0.2,
+        num_classes=num_classes,
+    )
     if pretrained is not None:
-        settings = pretrained_settings['senet154'][pretrained]
+        settings = pretrained_settings["senet154"][pretrained]
         initialize_pretrained_model(model, num_classes, settings)
     return model
 
 
-def se_resnet50(num_classes=1000, pretrained='imagenet'):
-    model = SENet(SEResNetBottleneck, [3, 4, 6, 3], groups=1, reduction=16,
-                  dropout_p=None, inplanes=64, input_3x3=False,
-                  downsample_kernel_size=1, downsample_padding=0,
-                  num_classes=num_classes)
+def se_resnet50(num_classes=1000, pretrained="imagenet"):
+    model = SENet(
+        SEResNetBottleneck,
+        [3, 4, 6, 3],
+        groups=1,
+        reduction=16,
+        dropout_p=None,
+        inplanes=64,
+        input_3x3=False,
+        downsample_kernel_size=1,
+        downsample_padding=0,
+        num_classes=num_classes,
+    )
     if pretrained is not None:
-        settings = pretrained_settings['se_resnet50'][pretrained]
+        settings = pretrained_settings["se_resnet50"][pretrained]
         initialize_pretrained_model(model, num_classes, settings)
     return model
 
 
-def se_resnet101(num_classes=1000, pretrained='imagenet'):
-    model = SENet(SEResNetBottleneck, [3, 4, 23, 3], groups=1, reduction=16,
-                  dropout_p=None, inplanes=64, input_3x3=False,
-                  downsample_kernel_size=1, downsample_padding=0,
-                  num_classes=num_classes)
+def se_resnet101(num_classes=1000, pretrained="imagenet"):
+    model = SENet(
+        SEResNetBottleneck,
+        [3, 4, 23, 3],
+        groups=1,
+        reduction=16,
+        dropout_p=None,
+        inplanes=64,
+        input_3x3=False,
+        downsample_kernel_size=1,
+        downsample_padding=0,
+        num_classes=num_classes,
+    )
     if pretrained is not None:
-        settings = pretrained_settings['se_resnet101'][pretrained]
+        settings = pretrained_settings["se_resnet101"][pretrained]
         initialize_pretrained_model(model, num_classes, settings)
     return model
 
 
-def se_resnet152(num_classes=1000, pretrained='imagenet'):
-    model = SENet(SEResNetBottleneck, [3, 8, 36, 3], groups=1, reduction=16,
-                  dropout_p=None, inplanes=64, input_3x3=False,
-                  downsample_kernel_size=1, downsample_padding=0,
-                  num_classes=num_classes)
+def se_resnet152(num_classes=1000, pretrained="imagenet"):
+    model = SENet(
+        SEResNetBottleneck,
+        [3, 8, 36, 3],
+        groups=1,
+        reduction=16,
+        dropout_p=None,
+        inplanes=64,
+        input_3x3=False,
+        downsample_kernel_size=1,
+        downsample_padding=0,
+        num_classes=num_classes,
+    )
     if pretrained is not None:
-        settings = pretrained_settings['se_resnet152'][pretrained]
+        settings = pretrained_settings["se_resnet152"][pretrained]
         initialize_pretrained_model(model, num_classes, settings)
     return model
 
 
-def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'):
-    model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16,
-                  dropout_p=None, inplanes=64, input_3x3=False,
-                  downsample_kernel_size=1, downsample_padding=0,
-                  num_classes=num_classes)
+def se_resnext50_32x4d(num_classes=1000, pretrained="imagenet"):
+    model = SENet(
+        SEResNeXtBottleneck,
+        [3, 4, 6, 3],
+        groups=32,
+        reduction=16,
+        dropout_p=None,
+        inplanes=64,
+        input_3x3=False,
+        downsample_kernel_size=1,
+        downsample_padding=0,
+        num_classes=num_classes,
+    )
     if pretrained is not None:
-        settings = pretrained_settings['se_resnext50_32x4d'][pretrained]
+        settings = pretrained_settings["se_resnext50_32x4d"][pretrained]
         initialize_pretrained_model(model, num_classes, settings)
     return model
 
 
-def se_resnext101_32x4d(num_classes=1000, pretrained='imagenet'):
-    model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16,
-                  dropout_p=None, inplanes=64, input_3x3=False,
-                  downsample_kernel_size=1, downsample_padding=0,
-                  num_classes=num_classes)
+def se_resnext101_32x4d(num_classes=1000, pretrained="imagenet"):
+    model = SENet(
+        SEResNeXtBottleneck,
+        [3, 4, 23, 3],
+        groups=32,
+        reduction=16,
+        dropout_p=None,
+        inplanes=64,
+        input_3x3=False,
+        downsample_kernel_size=1,
+        downsample_padding=0,
+        num_classes=num_classes,
+    )
     if pretrained is not None:
-        settings = pretrained_settings['se_resnext101_32x4d'][pretrained]
+        settings = pretrained_settings["se_resnext101_32x4d"][pretrained]
         initialize_pretrained_model(model, num_classes, settings)
     return model
diff --git a/segmentation_models_pytorch/encoders/_xception.py b/segmentation_models_pytorch/encoders/_xception.py
index 7783c477..5c4420f6 100644
--- a/segmentation_models_pytorch/encoders/_xception.py
+++ b/segmentation_models_pytorch/encoders/_xception.py
@@ -21,72 +21,108 @@
 
 The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299
 """
+
 from __future__ import print_function, division, absolute_import
-import math
-import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.utils.model_zoo as model_zoo
-from torch.nn import init
 
-__all__ = ['xception']
+__all__ = ["xception"]
 
 pretrained_settings = {
-    'xception': {
-        'imagenet': {
-            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth',
-            'input_space': 'RGB',
-            'input_size': [3, 299, 299],
-            'input_range': [0, 1],
-            'mean': [0.5, 0.5, 0.5],
-            'std': [0.5, 0.5, 0.5],
-            'num_classes': 1000,
-            'scale': 0.8975 # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299
+    "xception": {
+        "imagenet": {
+            "url": "http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth",
+            "input_space": "RGB",
+            "input_size": [3, 299, 299],
+            "input_range": [0, 1],
+            "mean": [0.5, 0.5, 0.5],
+            "std": [0.5, 0.5, 0.5],
+            "num_classes": 1000,
+            "scale": 0.8975,  # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299
         }
     }
 }
 
 
 class SeparableConv2d(nn.Module):
-    def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
-        super(SeparableConv2d,self).__init__()
-
-        self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
-        self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)
-
-    def forward(self,x):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size=1,
+        stride=1,
+        padding=0,
+        dilation=1,
+        bias=False,
+    ):
+        super(SeparableConv2d, self).__init__()
+
+        self.conv1 = nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups=in_channels,
+            bias=bias,
+        )
+        self.pointwise = nn.Conv2d(in_channels, out_channels, 1, 1, 0, 1, 1, bias=bias)
+
+    def forward(self, x):
         x = self.conv1(x)
         x = self.pointwise(x)
         return x
 
 
 class Block(nn.Module):
-    def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
+    def __init__(
+        self,
+        in_filters,
+        out_filters,
+        reps,
+        strides=1,
+        start_with_relu=True,
+        grow_first=True,
+    ):
         super(Block, self).__init__()
 
-        if out_filters != in_filters or strides!=1:
-            self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
+        if out_filters != in_filters or strides != 1:
+            self.skip = nn.Conv2d(
+                in_filters, out_filters, 1, stride=strides, bias=False
+            )
             self.skipbn = nn.BatchNorm2d(out_filters)
         else:
-            self.skip=None
+            self.skip = None
 
-        rep=[]
+        rep = []
 
-        filters=in_filters
+        filters = in_filters
         if grow_first:
             rep.append(nn.ReLU(inplace=True))
-            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
+            rep.append(
+                SeparableConv2d(
+                    in_filters, out_filters, 3, stride=1, padding=1, bias=False
+                )
+            )
             rep.append(nn.BatchNorm2d(out_filters))
             filters = out_filters
 
-        for i in range(reps-1):
+        for i in range(reps - 1):
             rep.append(nn.ReLU(inplace=True))
-            rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
+            rep.append(
+                SeparableConv2d(filters, filters, 3, stride=1, padding=1, bias=False)
+            )
             rep.append(nn.BatchNorm2d(filters))
 
         if not grow_first:
             rep.append(nn.ReLU(inplace=True))
-            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
+            rep.append(
+                SeparableConv2d(
+                    in_filters, out_filters, 3, stride=1, padding=1, bias=False
+                )
+            )
             rep.append(nn.BatchNorm2d(out_filters))
 
         if not start_with_relu:
@@ -95,10 +131,10 @@ def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,gro
             rep[0] = nn.ReLU(inplace=False)
 
         if strides != 1:
-            rep.append(nn.MaxPool2d(3,strides,1))
+            rep.append(nn.MaxPool2d(3, strides, 1))
         self.rep = nn.Sequential(*rep)
 
-    def forward(self,inp):
+    def forward(self, inp):
         x = self.rep(inp)
 
         if self.skip is not None:
@@ -107,7 +143,7 @@ def forward(self,inp):
         else:
             skip = inp
 
-        x+=skip
+        x += skip
         return x
 
 
@@ -116,45 +152,46 @@ class Xception(nn.Module):
     Xception optimized for the ImageNet dataset, as specified in
     https://arxiv.org/pdf/1610.02357.pdf
     """
+
     def __init__(self, num_classes=1000):
-        """ Constructor
+        """Constructor
         Args:
             num_classes: number of classes
         """
         super(Xception, self).__init__()
         self.num_classes = num_classes
 
-        self.conv1 = nn.Conv2d(3, 32, 3,2, 0, bias=False)
+        self.conv1 = nn.Conv2d(3, 32, 3, 2, 0, bias=False)
         self.bn1 = nn.BatchNorm2d(32)
         self.relu1 = nn.ReLU(inplace=True)
 
-        self.conv2 = nn.Conv2d(32,64,3,bias=False)
+        self.conv2 = nn.Conv2d(32, 64, 3, bias=False)
         self.bn2 = nn.BatchNorm2d(64)
         self.relu2 = nn.ReLU(inplace=True)
-        #do relu here
+        # do relu here
 
-        self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
-        self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
-        self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)
+        self.block1 = Block(64, 128, 2, 2, start_with_relu=False, grow_first=True)
+        self.block2 = Block(128, 256, 2, 2, start_with_relu=True, grow_first=True)
+        self.block3 = Block(256, 728, 2, 2, start_with_relu=True, grow_first=True)
 
-        self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
-        self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
-        self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
-        self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)
+        self.block4 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True)
+        self.block5 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True)
+        self.block6 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True)
+        self.block7 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True)
 
-        self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
-        self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
-        self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
-        self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)
+        self.block8 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True)
+        self.block9 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True)
+        self.block10 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True)
+        self.block11 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True)
 
-        self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)
+        self.block12 = Block(728, 1024, 2, 2, start_with_relu=True, grow_first=False)
 
-        self.conv3 = SeparableConv2d(1024,1536,3,1,1)
+        self.conv3 = SeparableConv2d(1024, 1536, 3, 1, 1)
         self.bn3 = nn.BatchNorm2d(1536)
         self.relu3 = nn.ReLU(inplace=True)
 
-        #do relu here
-        self.conv4 = SeparableConv2d(1536,2048,3,1,1)
+        # do relu here
+        self.conv4 = SeparableConv2d(1536, 2048, 3, 1, 1)
         self.bn4 = nn.BatchNorm2d(2048)
 
         self.fc = nn.Linear(2048, num_classes)
@@ -213,21 +250,24 @@ def forward(self, input):
         return x
 
 
-def xception(num_classes=1000, pretrained='imagenet'):
+def xception(num_classes=1000, pretrained="imagenet"):
     model = Xception(num_classes=num_classes)
     if pretrained:
-        settings = pretrained_settings['xception'][pretrained]
-        assert num_classes == settings['num_classes'], \
-            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
+        settings = pretrained_settings["xception"][pretrained]
+        assert num_classes == settings["num_classes"], (
+            "num_classes should be {}, but is {}".format(
+                settings["num_classes"], num_classes
+            )
+        )
 
         model = Xception(num_classes=num_classes)
-        model.load_state_dict(model_zoo.load_url(settings['url']))
+        model.load_state_dict(model_zoo.load_url(settings["url"]))
 
-        model.input_space = settings['input_space']
-        model.input_size = settings['input_size']
-        model.input_range = settings['input_range']
-        model.mean = settings['mean']
-        model.std = settings['std']
+        model.input_space = settings["input_space"]
+        model.input_size = settings["input_size"]
+        model.input_range = settings["input_range"]
+        model.mean = settings["mean"]
+        model.std = settings["std"]
 
     # TODO: ugly
     model.last_linear = model.fc
diff --git a/segmentation_models_pytorch/encoders/senet.py b/segmentation_models_pytorch/encoders/senet.py
index 03cf0820..bd52fb4b 100644
--- a/segmentation_models_pytorch/encoders/senet.py
+++ b/segmentation_models_pytorch/encoders/senet.py
@@ -29,9 +29,6 @@
 from ._base import EncoderMixin
 from ._senet import (
     SENet,
-    SEBottleneck,
-    SEResNetBottleneck,
-    SEResNeXtBottleneck,
 )