From 3635e190cc2f38d9335df3e45211c71fc4f46482 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Wed, 15 Jan 2025 18:13:19 +0100 Subject: [PATCH 1/4] Vendor efficientnet-pytorch --- docs/conf.py | 1 - licenses/LICENSES.md | 3 + licenses/LICENSE_apache.md | 202 +++ pyproject.toml | 1 - requirements/minimum.old | 1 - requirements/required.txt | 1 - .../encoders/_efficientnet.py | 1222 +++++++++++++++++ .../encoders/efficientnet.py | 4 +- 8 files changed, 1428 insertions(+), 7 deletions(-) create mode 100644 licenses/LICENSE_apache.md create mode 100644 segmentation_models_pytorch/encoders/_efficientnet.py diff --git a/docs/conf.py b/docs/conf.py index c7dde9e5..82583c6b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -102,7 +102,6 @@ def get_version(): "PIL", "pretrainedmodels", "torchvision", - "efficientnet-pytorch", "segmentation_models_pytorch.encoders", "segmentation_models_pytorch.utils", # 'segmentation_models_pytorch.base', diff --git a/licenses/LICENSES.md b/licenses/LICENSES.md index 670764a2..06f36241 100644 --- a/licenses/LICENSES.md +++ b/licenses/LICENSES.md @@ -23,3 +23,6 @@ The majority of the code is licensed under the [MIT License](LICENSE). However, * Applies to the DeepLabV3 decoder * [segmentation_models_pytorch/decoders/deeplabv3/decoder.py](https://github.com/qubvel/segmentation_models.pytorch/blob/main/segmentation_models_pytorch/decoders/deeplabv3/decoder.py) +- Apache-2.0 License + * Applies to the EfficientNet encoder + * [segmentation_models_pytorch/encoders/_efficientnet.py](https://github.com/qubvel/segmentation_models.pytorch/blob/main/segmentation_models_pytorch/encoders/_efficientnet.py) diff --git a/licenses/LICENSE_apache.md b/licenses/LICENSE_apache.md new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/licenses/LICENSE_apache.md @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/pyproject.toml b/pyproject.toml index c7cf1958..645ec369 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,6 @@ classifiers = [ 'Programming Language :: Python :: Implementation :: PyPy', ] dependencies = [ - 'efficientnet-pytorch>=0.6.1', 'huggingface-hub>=0.24', 'numpy>=1.19.3', 'pillow>=8', diff --git a/requirements/minimum.old b/requirements/minimum.old index 1080bdb4..40bdc6ce 100644 --- a/requirements/minimum.old +++ b/requirements/minimum.old @@ -1,4 +1,3 @@ -efficientnet-pytorch==0.6.1 huggingface-hub==0.24.0 numpy==1.19.3 pillow==8.0.0 diff --git a/requirements/required.txt b/requirements/required.txt index 988d496c..220e2ab9 100644 --- a/requirements/required.txt +++ b/requirements/required.txt @@ -1,4 +1,3 @@ -efficientnet-pytorch==0.7.1 huggingface_hub==0.27.1 numpy==2.2.1 pillow==11.1.0 diff --git a/segmentation_models_pytorch/encoders/_efficientnet.py b/segmentation_models_pytorch/encoders/_efficientnet.py new file mode 100644 index 00000000..860b17bb --- /dev/null +++ b/segmentation_models_pytorch/encoders/_efficientnet.py @@ -0,0 +1,1222 @@ +"""model.py - Model and module class for EfficientNet. +They are built to mirror those in the official TensorFlow implementation. +""" + +# Author: lukemelas (github username) +# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch +# With adjustments and added comments by workingcoder (github username). + +import torch +from torch import nn +from torch.nn import functional as F +import re +import math +import collections +from functools import partial +from torch.utils import model_zoo + + +VALID_MODELS = ( + "efficientnet-b0", + "efficientnet-b1", + "efficientnet-b2", + "efficientnet-b3", + "efficientnet-b4", + "efficientnet-b5", + "efficientnet-b6", + "efficientnet-b7", + "efficientnet-b8", + # Support the construction of 'efficientnet-l2' without pretrained weights + "efficientnet-l2", +) + + +class MBConvBlock(nn.Module): + """Mobile Inverted Residual Bottleneck Block. + + Args: + block_args (namedtuple): BlockArgs, defined in utils.py. + global_params (namedtuple): GlobalParam, defined in utils.py. + image_size (tuple or list): [image_height, image_width]. + + References: + [1] https://arxiv.org/abs/1704.04861 (MobileNet v1) + [2] https://arxiv.org/abs/1801.04381 (MobileNet v2) + [3] https://arxiv.org/abs/1905.02244 (MobileNet v3) + """ + + def __init__(self, block_args, global_params, image_size=None): + super().__init__() + self._block_args = block_args + self._bn_mom = ( + 1 - global_params.batch_norm_momentum + ) # pytorch's difference from tensorflow + self._bn_eps = global_params.batch_norm_epsilon + self.has_se = (self._block_args.se_ratio is not None) and ( + 0 < self._block_args.se_ratio <= 1 + ) + self.id_skip = ( + block_args.id_skip + ) # whether to use skip connection and drop connect + + # Expansion phase (Inverted Bottleneck) + inp = self._block_args.input_filters # number of input channels + oup = ( + self._block_args.input_filters * self._block_args.expand_ratio + ) # number of output channels + if self._block_args.expand_ratio != 1: + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._expand_conv = Conv2d( + in_channels=inp, out_channels=oup, kernel_size=1, bias=False + ) + self._bn0 = nn.BatchNorm2d( + num_features=oup, momentum=self._bn_mom, eps=self._bn_eps + ) + # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size + + # Depthwise convolution phase + k = self._block_args.kernel_size + s = self._block_args.stride + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._depthwise_conv = Conv2d( + in_channels=oup, + out_channels=oup, + groups=oup, # groups makes it depthwise + kernel_size=k, + stride=s, + bias=False, + ) + self._bn1 = nn.BatchNorm2d( + num_features=oup, momentum=self._bn_mom, eps=self._bn_eps + ) + image_size = calculate_output_image_size(image_size, s) + + # Squeeze and Excitation layer, if desired + if self.has_se: + Conv2d = get_same_padding_conv2d(image_size=(1, 1)) + num_squeezed_channels = max( + 1, int(self._block_args.input_filters * self._block_args.se_ratio) + ) + self._se_reduce = Conv2d( + in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1 + ) + self._se_expand = Conv2d( + in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1 + ) + + # Pointwise convolution phase + final_oup = self._block_args.output_filters + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._project_conv = Conv2d( + in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False + ) + self._bn2 = nn.BatchNorm2d( + num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps + ) + self._swish = MemoryEfficientSwish() + + def forward(self, inputs, drop_connect_rate=None): + """MBConvBlock's forward function. + + Args: + inputs (tensor): Input tensor. + drop_connect_rate (bool): Drop connect rate (float, between 0 and 1). + + Returns: + Output of this block after processing. + """ + + # Expansion and Depthwise Convolution + x = inputs + if self._block_args.expand_ratio != 1: + x = self._expand_conv(inputs) + x = self._bn0(x) + x = self._swish(x) + + x = self._depthwise_conv(x) + x = self._bn1(x) + x = self._swish(x) + + # Squeeze and Excitation + if self.has_se: + x_squeezed = F.adaptive_avg_pool2d(x, 1) + x_squeezed = self._se_reduce(x_squeezed) + x_squeezed = self._swish(x_squeezed) + x_squeezed = self._se_expand(x_squeezed) + x = torch.sigmoid(x_squeezed) * x + + # Pointwise Convolution + x = self._project_conv(x) + x = self._bn2(x) + + # Skip connection and drop connect + input_filters, output_filters = ( + self._block_args.input_filters, + self._block_args.output_filters, + ) + if ( + self.id_skip + and self._block_args.stride == 1 + and input_filters == output_filters + ): + # The combination of skip connection and drop connect brings about stochastic depth. + if drop_connect_rate: + x = drop_connect(x, p=drop_connect_rate, training=self.training) + x = x + inputs # skip connection + return x + + def set_swish(self, memory_efficient=True): + """Sets swish function as memory efficient (for training) or standard (for export). + + Args: + memory_efficient (bool): Whether to use memory-efficient version of swish. + """ + self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + + +class EfficientNet(nn.Module): + """EfficientNet model. + Most easily loaded with the .from_name or .from_pretrained methods. + + Args: + blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks. + global_params (namedtuple): A set of GlobalParams shared between blocks. + + References: + [1] https://arxiv.org/abs/1905.11946 (EfficientNet) + + Example: + >>> import torch + >>> from efficientnet.model import EfficientNet + >>> inputs = torch.rand(1, 3, 224, 224) + >>> model = EfficientNet.from_pretrained('efficientnet-b0') + >>> model.eval() + >>> outputs = model(inputs) + """ + + def __init__(self, blocks_args=None, global_params=None): + super().__init__() + assert isinstance(blocks_args, list), "blocks_args should be a list" + assert len(blocks_args) > 0, "block args must be greater than 0" + self._global_params = global_params + self._blocks_args = blocks_args + + # Batch norm parameters + bn_mom = 1 - self._global_params.batch_norm_momentum + bn_eps = self._global_params.batch_norm_epsilon + + # Get stem static or dynamic convolution depending on image size + image_size = global_params.image_size + Conv2d = get_same_padding_conv2d(image_size=image_size) + + # Stem + in_channels = 3 # rgb + out_channels = round_filters( + 32, self._global_params + ) # number of output channels + self._conv_stem = Conv2d( + in_channels, out_channels, kernel_size=3, stride=2, bias=False + ) + self._bn0 = nn.BatchNorm2d( + num_features=out_channels, momentum=bn_mom, eps=bn_eps + ) + image_size = calculate_output_image_size(image_size, 2) + + # Build blocks + self._blocks = nn.ModuleList([]) + for block_args in self._blocks_args: + # Update block input and output filters based on depth multiplier. + block_args = block_args._replace( + input_filters=round_filters( + block_args.input_filters, self._global_params + ), + output_filters=round_filters( + block_args.output_filters, self._global_params + ), + num_repeat=round_repeats(block_args.num_repeat, self._global_params), + ) + + # The first block needs to take care of stride and filter size increase. + self._blocks.append( + MBConvBlock(block_args, self._global_params, image_size=image_size) + ) + image_size = calculate_output_image_size(image_size, block_args.stride) + if block_args.num_repeat > 1: # modify block_args to keep same output size + block_args = block_args._replace( + input_filters=block_args.output_filters, stride=1 + ) + for _ in range(block_args.num_repeat - 1): + self._blocks.append( + MBConvBlock(block_args, self._global_params, image_size=image_size) + ) + # image_size = calculate_output_image_size(image_size, block_args.stride) # stride = 1 + + # Head + in_channels = block_args.output_filters # output of final block + out_channels = round_filters(1280, self._global_params) + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False) + self._bn1 = nn.BatchNorm2d( + num_features=out_channels, momentum=bn_mom, eps=bn_eps + ) + + # Final linear layer + self._avg_pooling = nn.AdaptiveAvgPool2d(1) + if self._global_params.include_top: + self._dropout = nn.Dropout(self._global_params.dropout_rate) + self._fc = nn.Linear(out_channels, self._global_params.num_classes) + + # set activation to memory efficient swish by default + self._swish = MemoryEfficientSwish() + + def set_swish(self, memory_efficient=True): + """Sets swish function as memory efficient (for training) or standard (for export). + + Args: + memory_efficient (bool): Whether to use memory-efficient version of swish. + """ + self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + for block in self._blocks: + block.set_swish(memory_efficient) + + def extract_endpoints(self, inputs): + """Use convolution layer to extract features + from reduction levels i in [1, 2, 3, 4, 5]. + + Args: + inputs (tensor): Input tensor. + + Returns: + Dictionary of last intermediate features + with reduction levels i in [1, 2, 3, 4, 5]. + Example: + >>> import torch + >>> from efficientnet.model import EfficientNet + >>> inputs = torch.rand(1, 3, 224, 224) + >>> model = EfficientNet.from_pretrained('efficientnet-b0') + >>> endpoints = model.extract_endpoints(inputs) + >>> print(endpoints['reduction_1'].shape) # torch.Size([1, 16, 112, 112]) + >>> print(endpoints['reduction_2'].shape) # torch.Size([1, 24, 56, 56]) + >>> print(endpoints['reduction_3'].shape) # torch.Size([1, 40, 28, 28]) + >>> print(endpoints['reduction_4'].shape) # torch.Size([1, 112, 14, 14]) + >>> print(endpoints['reduction_5'].shape) # torch.Size([1, 320, 7, 7]) + >>> print(endpoints['reduction_6'].shape) # torch.Size([1, 1280, 7, 7]) + """ + endpoints = dict() + + # Stem + x = self._swish(self._bn0(self._conv_stem(inputs))) + prev_x = x + + # Blocks + for idx, block in enumerate(self._blocks): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / len( + self._blocks + ) # scale drop connect_rate + x = block(x, drop_connect_rate=drop_connect_rate) + if prev_x.size(2) > x.size(2): + endpoints["reduction_{}".format(len(endpoints) + 1)] = prev_x + elif idx == len(self._blocks) - 1: + endpoints["reduction_{}".format(len(endpoints) + 1)] = x + prev_x = x + + # Head + x = self._swish(self._bn1(self._conv_head(x))) + endpoints["reduction_{}".format(len(endpoints) + 1)] = x + + return endpoints + + def extract_features(self, inputs): + """use convolution layer to extract feature . + + Args: + inputs (tensor): Input tensor. + + Returns: + Output of the final convolution + layer in the efficientnet model. + """ + # Stem + x = self._swish(self._bn0(self._conv_stem(inputs))) + + # Blocks + for idx, block in enumerate(self._blocks): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / len( + self._blocks + ) # scale drop connect_rate + x = block(x, drop_connect_rate=drop_connect_rate) + + # Head + x = self._swish(self._bn1(self._conv_head(x))) + + return x + + def forward(self, inputs): + """EfficientNet's forward function. + Calls extract_features to extract features, applies final linear layer, and returns logits. + + Args: + inputs (tensor): Input tensor. + + Returns: + Output of this model after processing. + """ + # Convolution layers + x = self.extract_features(inputs) + # Pooling and final linear layer + x = self._avg_pooling(x) + if self._global_params.include_top: + x = x.flatten(start_dim=1) + x = self._dropout(x) + x = self._fc(x) + return x + + @classmethod + def from_name(cls, model_name, in_channels=3, **override_params): + """Create an efficientnet model according to name. + + Args: + model_name (str): Name for efficientnet. + in_channels (int): Input data's channel number. + override_params (other key word params): + Params to override model's global_params. + Optional key: + 'width_coefficient', 'depth_coefficient', + 'image_size', 'dropout_rate', + 'num_classes', 'batch_norm_momentum', + 'batch_norm_epsilon', 'drop_connect_rate', + 'depth_divisor', 'min_depth' + + Returns: + An efficientnet model. + """ + cls._check_model_name_is_valid(model_name) + blocks_args, global_params = get_model_params(model_name, override_params) + model = cls(blocks_args, global_params) + model._change_in_channels(in_channels) + return model + + @classmethod + def from_pretrained( + cls, + model_name, + weights_path=None, + advprop=False, + in_channels=3, + num_classes=1000, + **override_params, + ): + """Create an efficientnet model according to name. + + Args: + model_name (str): Name for efficientnet. + weights_path (None or str): + str: path to pretrained weights file on the local disk. + None: use pretrained weights downloaded from the Internet. + advprop (bool): + Whether to load pretrained weights + trained with advprop (valid when weights_path is None). + in_channels (int): Input data's channel number. + num_classes (int): + Number of categories for classification. + It controls the output size for final linear layer. + override_params (other key word params): + Params to override model's global_params. + Optional key: + 'width_coefficient', 'depth_coefficient', + 'image_size', 'dropout_rate', + 'batch_norm_momentum', + 'batch_norm_epsilon', 'drop_connect_rate', + 'depth_divisor', 'min_depth' + + Returns: + A pretrained efficientnet model. + """ + model = cls.from_name(model_name, num_classes=num_classes, **override_params) + load_pretrained_weights( + model, + model_name, + weights_path=weights_path, + load_fc=(num_classes == 1000), + advprop=advprop, + ) + model._change_in_channels(in_channels) + return model + + @classmethod + def get_image_size(cls, model_name): + """Get the input image size for a given efficientnet model. + + Args: + model_name (str): Name for efficientnet. + + Returns: + Input image size (resolution). + """ + cls._check_model_name_is_valid(model_name) + _, _, res, _ = efficientnet_params(model_name) + return res + + @classmethod + def _check_model_name_is_valid(cls, model_name): + """Validates model name. + + Args: + model_name (str): Name for efficientnet. + + Returns: + bool: Is a valid name or not. + """ + if model_name not in VALID_MODELS: + raise ValueError("model_name should be one of: " + ", ".join(VALID_MODELS)) + + def _change_in_channels(self, in_channels): + """Adjust model's first convolution layer to in_channels, if in_channels not equals 3. + + Args: + in_channels (int): Input data's channel number. + """ + if in_channels != 3: + Conv2d = get_same_padding_conv2d(image_size=self._global_params.image_size) + out_channels = round_filters(32, self._global_params) + self._conv_stem = Conv2d( + in_channels, out_channels, kernel_size=3, stride=2, bias=False + ) + + +"""utils.py - Helper functions for building the model and for loading model parameters. + These helper functions are built to mirror those in the official TensorFlow implementation. +""" + +# Author: lukemelas (github username) +# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch +# With adjustments and added comments by workingcoder (github username). + + +################################################################################ +# Help functions for model architecture +################################################################################ + +# GlobalParams and BlockArgs: Two namedtuples +# Swish and MemoryEfficientSwish: Two implementations of the method +# round_filters and round_repeats: +# Functions to calculate params for scaling model width and depth ! ! ! +# get_width_and_height_from_size and calculate_output_image_size +# drop_connect: A structural design +# get_same_padding_conv2d: +# Conv2dDynamicSamePadding +# Conv2dStaticSamePadding +# get_same_padding_maxPool2d: +# MaxPool2dDynamicSamePadding +# MaxPool2dStaticSamePadding +# It's an additional function, not used in EfficientNet, +# but can be used in other model (such as EfficientDet). + +# Parameters for the entire model (stem, all blocks, and head) +GlobalParams = collections.namedtuple( + "GlobalParams", + [ + "width_coefficient", + "depth_coefficient", + "image_size", + "dropout_rate", + "num_classes", + "batch_norm_momentum", + "batch_norm_epsilon", + "drop_connect_rate", + "depth_divisor", + "min_depth", + "include_top", + ], +) + +# Parameters for an individual model block +BlockArgs = collections.namedtuple( + "BlockArgs", + [ + "num_repeat", + "kernel_size", + "stride", + "expand_ratio", + "input_filters", + "output_filters", + "se_ratio", + "id_skip", + ], +) + +# Set GlobalParams and BlockArgs's defaults +GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) + +# Swish activation function +if hasattr(nn, "SiLU"): + Swish = nn.SiLU +else: + # For compatibility with old PyTorch versions + class Swish(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + + +# A memory-efficient implementation of Swish function +class SwishImplementation(torch.autograd.Function): + @staticmethod + def forward(ctx, i): + result = i * torch.sigmoid(i) + ctx.save_for_backward(i) + return result + + @staticmethod + def backward(ctx, grad_output): + i = ctx.saved_tensors[0] + sigmoid_i = torch.sigmoid(i) + return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) + + +class MemoryEfficientSwish(nn.Module): + def forward(self, x): + return SwishImplementation.apply(x) + + +def round_filters(filters, global_params): + """Calculate and round number of filters based on width multiplier. + Use width_coefficient, depth_divisor and min_depth of global_params. + + Args: + filters (int): Filters number to be calculated. + global_params (namedtuple): Global params of the model. + + Returns: + new_filters: New filters number after calculating. + """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + # TODO: modify the params names. + # maybe the names (width_divisor,min_width) + # are more suitable than (depth_divisor,min_depth). + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor # pay attention to this line when using min_depth + # follow the formula transferred from official TensorFlow implementation + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats, global_params): + """Calculate module's repeat number of a block based on depth multiplier. + Use depth_coefficient of global_params. + + Args: + repeats (int): num_repeat to be calculated. + global_params (namedtuple): Global params of the model. + + Returns: + new repeat: New repeat number after calculating. + """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + # follow the formula transferred from official TensorFlow implementation + return int(math.ceil(multiplier * repeats)) + + +def drop_connect(inputs, p, training): + """Drop connect. + + Args: + input (tensor: BCWH): Input of this structure. + p (float: 0.0~1.0): Probability of drop connection. + training (bool): The running mode. + + Returns: + output: Output after drop connection. + """ + assert 0 <= p <= 1, "p must be in range of [0,1]" + + if not training: + return inputs + + batch_size = inputs.shape[0] + keep_prob = 1 - p + + # generate binary_tensor mask according to probability (p for 0, 1-p for 1) + random_tensor = keep_prob + random_tensor += torch.rand( + [batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device + ) + binary_tensor = torch.floor(random_tensor) + + output = inputs / keep_prob * binary_tensor + return output + + +def get_width_and_height_from_size(x): + """Obtain height and width from x. + + Args: + x (int, tuple or list): Data size. + + Returns: + size: A tuple or list (H,W). + """ + if isinstance(x, int): + return x, x + if isinstance(x, list) or isinstance(x, tuple): + return x + else: + raise TypeError() + + +def calculate_output_image_size(input_image_size, stride): + """Calculates the output image size when using Conv2dSamePadding with a stride. + Necessary for static padding. Thanks to mannatsingh for pointing this out. + + Args: + input_image_size (int, tuple or list): Size of input image. + stride (int, tuple or list): Conv2d operation's stride. + + Returns: + output_image_size: A list [H,W]. + """ + if input_image_size is None: + return None + image_height, image_width = get_width_and_height_from_size(input_image_size) + stride = stride if isinstance(stride, int) else stride[0] + image_height = int(math.ceil(image_height / stride)) + image_width = int(math.ceil(image_width / stride)) + return [image_height, image_width] + + +# Note: +# The following 'SamePadding' functions make output size equal ceil(input size/stride). +# Only when stride equals 1, can the output size be the same as input size. +# Don't be confused by their function names ! ! ! + + +def get_same_padding_conv2d(image_size=None): + """Chooses static padding if you have specified an image size, and dynamic padding otherwise. + Static padding is necessary for ONNX exporting of models. + + Args: + image_size (int or tuple): Size of the image. + + Returns: + Conv2dDynamicSamePadding or Conv2dStaticSamePadding. + """ + if image_size is None: + return Conv2dDynamicSamePadding + else: + return partial(Conv2dStaticSamePadding, image_size=image_size) + + +class Conv2dDynamicSamePadding(nn.Conv2d): + """2D Convolutions like TensorFlow, for a dynamic image size. + The padding is operated in forward function by calculating dynamically. + """ + + # Tips for 'SAME' mode padding. + # Given the following: + # i: width or height + # s: stride + # k: kernel size + # d: dilation + # p: padding + # Output after Conv2d: + # o = floor((i+p-((k-1)*d+1))/s+1) + # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1), + # => p = (i-1)*s+((k-1)*d+1)-i + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + dilation=1, + groups=1, + bias=True, + ): + super().__init__( + in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias + ) + self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 + + def forward(self, x): + ih, iw = x.size()[-2:] + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = ( + math.ceil(ih / sh), + math.ceil(iw / sw), + ) # change the output size according to stride ! ! ! + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + x = F.pad( + x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2] + ) + return F.conv2d( + x, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + ) + + +class Conv2dStaticSamePadding(nn.Conv2d): + """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size. + The padding mudule is calculated in construction function, then used in forward. + """ + + # With the same calculation as Conv2dDynamicSamePadding + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + image_size=None, + **kwargs, + ): + super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs) + self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 + + # Calculate padding based on image size and save it + assert image_size is not None + ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + self.static_padding = nn.ZeroPad2d( + (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2) + ) + else: + self.static_padding = nn.Identity() + + def forward(self, x): + x = self.static_padding(x) + x = F.conv2d( + x, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + ) + return x + + +def get_same_padding_maxPool2d(image_size=None): + """Chooses static padding if you have specified an image size, and dynamic padding otherwise. + Static padding is necessary for ONNX exporting of models. + + Args: + image_size (int or tuple): Size of the image. + + Returns: + MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding. + """ + if image_size is None: + return MaxPool2dDynamicSamePadding + else: + return partial(MaxPool2dStaticSamePadding, image_size=image_size) + + +class MaxPool2dDynamicSamePadding(nn.MaxPool2d): + """2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size. + The padding is operated in forward function by calculating dynamically. + """ + + def __init__( + self, + kernel_size, + stride, + padding=0, + dilation=1, + return_indices=False, + ceil_mode=False, + ): + super().__init__( + kernel_size, stride, padding, dilation, return_indices, ceil_mode + ) + self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride + self.kernel_size = ( + [self.kernel_size] * 2 + if isinstance(self.kernel_size, int) + else self.kernel_size + ) + self.dilation = ( + [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation + ) + + def forward(self, x): + ih, iw = x.size()[-2:] + kh, kw = self.kernel_size + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + x = F.pad( + x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2] + ) + return F.max_pool2d( + x, + self.kernel_size, + self.stride, + self.padding, + self.dilation, + self.ceil_mode, + self.return_indices, + ) + + +class MaxPool2dStaticSamePadding(nn.MaxPool2d): + """2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size. + The padding mudule is calculated in construction function, then used in forward. + """ + + def __init__(self, kernel_size, stride, image_size=None, **kwargs): + super().__init__(kernel_size, stride, **kwargs) + self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride + self.kernel_size = ( + [self.kernel_size] * 2 + if isinstance(self.kernel_size, int) + else self.kernel_size + ) + self.dilation = ( + [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation + ) + + # Calculate padding based on image size and save it + assert image_size is not None + ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size + kh, kw = self.kernel_size + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + self.static_padding = nn.ZeroPad2d( + (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2) + ) + else: + self.static_padding = nn.Identity() + + def forward(self, x): + x = self.static_padding(x) + x = F.max_pool2d( + x, + self.kernel_size, + self.stride, + self.padding, + self.dilation, + self.ceil_mode, + self.return_indices, + ) + return x + + +################################################################################ +# Helper functions for loading model params +################################################################################ + +# BlockDecoder: A Class for encoding and decoding BlockArgs +# efficientnet_params: A function to query compound coefficient +# get_model_params and efficientnet: +# Functions to get BlockArgs and GlobalParams for efficientnet +# url_map and url_map_advprop: Dicts of url_map for pretrained weights +# load_pretrained_weights: A function to load pretrained weights + + +class BlockDecoder(object): + """Block Decoder for readability, + straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string): + """Get a block through a string notation of arguments. + + Args: + block_string (str): A string notation of arguments. + Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'. + + Returns: + BlockArgs: The namedtuple defined at the top of this file. + """ + assert isinstance(block_string, str) + + ops = block_string.split("_") + options = {} + for op in ops: + splits = re.split(r"(\d.*)", op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + assert ("s" in options and len(options["s"]) == 1) or ( + len(options["s"]) == 2 and options["s"][0] == options["s"][1] + ) + + return BlockArgs( + num_repeat=int(options["r"]), + kernel_size=int(options["k"]), + stride=[int(options["s"][0])], + expand_ratio=int(options["e"]), + input_filters=int(options["i"]), + output_filters=int(options["o"]), + se_ratio=float(options["se"]) if "se" in options else None, + id_skip=("noskip" not in block_string), + ) + + @staticmethod + def _encode_block_string(block): + """Encode a block to a string. + + Args: + block (namedtuple): A BlockArgs type argument. + + Returns: + block_string: A String form of BlockArgs. + """ + args = [ + "r%d" % block.num_repeat, + "k%d" % block.kernel_size, + "s%d%d" % (block.strides[0], block.strides[1]), + "e%s" % block.expand_ratio, + "i%d" % block.input_filters, + "o%d" % block.output_filters, + ] + if 0 < block.se_ratio <= 1: + args.append("se%s" % block.se_ratio) + if block.id_skip is False: + args.append("noskip") + return "_".join(args) + + @staticmethod + def decode(string_list): + """Decode a list of string notations to specify blocks inside the network. + + Args: + string_list (list[str]): A list of strings, each string is a notation of block. + + Returns: + blocks_args: A list of BlockArgs namedtuples of block args. + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args): + """Encode a list of BlockArgs to a list of strings. + + Args: + blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args. + + Returns: + block_strings: A list of strings, each string is a notation of block. + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def efficientnet_params(model_name): + """Map EfficientNet model name to parameter coefficients. + + Args: + model_name (str): Model name to be queried. + + Returns: + params_dict[model_name]: A (width,depth,res,dropout) tuple. + """ + params_dict = { + # Coefficients: width,depth,res,dropout + "efficientnet-b0": (1.0, 1.0, 224, 0.2), + "efficientnet-b1": (1.0, 1.1, 240, 0.2), + "efficientnet-b2": (1.1, 1.2, 260, 0.3), + "efficientnet-b3": (1.2, 1.4, 300, 0.3), + "efficientnet-b4": (1.4, 1.8, 380, 0.4), + "efficientnet-b5": (1.6, 2.2, 456, 0.4), + "efficientnet-b6": (1.8, 2.6, 528, 0.5), + "efficientnet-b7": (2.0, 3.1, 600, 0.5), + "efficientnet-b8": (2.2, 3.6, 672, 0.5), + "efficientnet-l2": (4.3, 5.3, 800, 0.5), + } + return params_dict[model_name] + + +def efficientnet( + width_coefficient=None, + depth_coefficient=None, + image_size=None, + dropout_rate=0.2, + drop_connect_rate=0.2, + num_classes=1000, + include_top=True, +): + """Create BlockArgs and GlobalParams for efficientnet model. + + Args: + width_coefficient (float) + depth_coefficient (float) + image_size (int) + dropout_rate (float) + drop_connect_rate (float) + num_classes (int) + + Meaning as the name suggests. + + Returns: + blocks_args, global_params. + """ + + # Blocks args for the whole model(efficientnet-b0 by default) + # It will be modified in the construction of EfficientNet Class according to model + blocks_args = [ + "r1_k3_s11_e1_i32_o16_se0.25", + "r2_k3_s22_e6_i16_o24_se0.25", + "r2_k5_s22_e6_i24_o40_se0.25", + "r3_k3_s22_e6_i40_o80_se0.25", + "r3_k5_s11_e6_i80_o112_se0.25", + "r4_k5_s22_e6_i112_o192_se0.25", + "r1_k3_s11_e6_i192_o320_se0.25", + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + image_size=image_size, + dropout_rate=dropout_rate, + num_classes=num_classes, + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + drop_connect_rate=drop_connect_rate, + depth_divisor=8, + min_depth=None, + include_top=include_top, + ) + + return blocks_args, global_params + + +def get_model_params(model_name, override_params): + """Get the block args and global params for a given model name. + + Args: + model_name (str): Model's name. + override_params (dict): A dict to modify global_params. + + Returns: + blocks_args, global_params + """ + if model_name.startswith("efficientnet"): + w, d, s, p = efficientnet_params(model_name) + # note: all models have drop connect rate = 0.2 + blocks_args, global_params = efficientnet( + width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s + ) + else: + raise NotImplementedError( + "model name is not pre-defined: {}".format(model_name) + ) + if override_params: + # ValueError will be raised here if override_params has fields not included in global_params. + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +# train with Standard methods +# check more details in paper(EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks) +url_map = { + "efficientnet-b0": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth", + "efficientnet-b1": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth", + "efficientnet-b2": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth", + "efficientnet-b3": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth", + "efficientnet-b4": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth", + "efficientnet-b5": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth", + "efficientnet-b6": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth", + "efficientnet-b7": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth", +} + +# train with Adversarial Examples(AdvProp) +# check more details in paper(Adversarial Examples Improve Image Recognition) +url_map_advprop = { + "efficientnet-b0": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth", + "efficientnet-b1": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth", + "efficientnet-b2": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth", + "efficientnet-b3": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth", + "efficientnet-b4": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth", + "efficientnet-b5": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth", + "efficientnet-b6": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth", + "efficientnet-b7": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth", + "efficientnet-b8": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth", +} + +# TODO: add the petrained weights url map of 'efficientnet-l2' + + +def load_pretrained_weights( + model, model_name, weights_path=None, load_fc=True, advprop=False, verbose=True +): + """Loads pretrained weights from weights path or download using url. + + Args: + model (Module): The whole model of efficientnet. + model_name (str): Model name of efficientnet. + weights_path (None or str): + str: path to pretrained weights file on the local disk. + None: use pretrained weights downloaded from the Internet. + load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model. + advprop (bool): Whether to load pretrained weights + trained with advprop (valid when weights_path is None). + """ + if isinstance(weights_path, str): + state_dict = torch.load(weights_path) + else: + # AutoAugment or Advprop (different preprocessing) + url_map_ = url_map_advprop if advprop else url_map + state_dict = model_zoo.load_url(url_map_[model_name]) + + if load_fc: + ret = model.load_state_dict(state_dict, strict=False) + assert not ret.missing_keys, ( + "Missing keys when loading pretrained weights: {}".format(ret.missing_keys) + ) + else: + state_dict.pop("_fc.weight") + state_dict.pop("_fc.bias") + ret = model.load_state_dict(state_dict, strict=False) + assert set(ret.missing_keys) == set(["_fc.weight", "_fc.bias"]), ( + "Missing keys when loading pretrained weights: {}".format(ret.missing_keys) + ) + assert not ret.unexpected_keys, ( + "Missing keys when loading pretrained weights: {}".format(ret.unexpected_keys) + ) + + if verbose: + print("Loaded pretrained weights for {}".format(model_name)) diff --git a/segmentation_models_pytorch/encoders/efficientnet.py b/segmentation_models_pytorch/encoders/efficientnet.py index 96edb4fe..f51635ff 100644 --- a/segmentation_models_pytorch/encoders/efficientnet.py +++ b/segmentation_models_pytorch/encoders/efficientnet.py @@ -26,10 +26,8 @@ import torch from typing import List, Dict, Sequence -from efficientnet_pytorch import EfficientNet -from efficientnet_pytorch.utils import url_map, url_map_advprop, get_model_params - from ._base import EncoderMixin +from ._efficientnet import EfficientNet, url_map, url_map_advprop, get_model_params class EfficientNetEncoder(EfficientNet, EncoderMixin): From 29f0bbf9bf43728779a9cd928742899e4049d69d Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Wed, 15 Jan 2025 20:31:59 +0100 Subject: [PATCH 2/4] Remove classmethods, PyTorch compatibility layer --- .../encoders/_efficientnet.py | 135 +----------------- 1 file changed, 2 insertions(+), 133 deletions(-) diff --git a/segmentation_models_pytorch/encoders/_efficientnet.py b/segmentation_models_pytorch/encoders/_efficientnet.py index 860b17bb..8e710c95 100644 --- a/segmentation_models_pytorch/encoders/_efficientnet.py +++ b/segmentation_models_pytorch/encoders/_efficientnet.py @@ -171,12 +171,11 @@ def set_swish(self, memory_efficient=True): Args: memory_efficient (bool): Whether to use memory-efficient version of swish. """ - self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + self._swish = MemoryEfficientSwish() if memory_efficient else nn.SiLU() class EfficientNet(nn.Module): """EfficientNet model. - Most easily loaded with the .from_name or .from_pretrained methods. Args: blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks. @@ -275,7 +274,7 @@ def set_swish(self, memory_efficient=True): Args: memory_efficient (bool): Whether to use memory-efficient version of swish. """ - self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + self._swish = MemoryEfficientSwish() if memory_efficient else nn.SiLU() for block in self._blocks: block.set_swish(memory_efficient) @@ -375,127 +374,6 @@ def forward(self, inputs): x = self._fc(x) return x - @classmethod - def from_name(cls, model_name, in_channels=3, **override_params): - """Create an efficientnet model according to name. - - Args: - model_name (str): Name for efficientnet. - in_channels (int): Input data's channel number. - override_params (other key word params): - Params to override model's global_params. - Optional key: - 'width_coefficient', 'depth_coefficient', - 'image_size', 'dropout_rate', - 'num_classes', 'batch_norm_momentum', - 'batch_norm_epsilon', 'drop_connect_rate', - 'depth_divisor', 'min_depth' - - Returns: - An efficientnet model. - """ - cls._check_model_name_is_valid(model_name) - blocks_args, global_params = get_model_params(model_name, override_params) - model = cls(blocks_args, global_params) - model._change_in_channels(in_channels) - return model - - @classmethod - def from_pretrained( - cls, - model_name, - weights_path=None, - advprop=False, - in_channels=3, - num_classes=1000, - **override_params, - ): - """Create an efficientnet model according to name. - - Args: - model_name (str): Name for efficientnet. - weights_path (None or str): - str: path to pretrained weights file on the local disk. - None: use pretrained weights downloaded from the Internet. - advprop (bool): - Whether to load pretrained weights - trained with advprop (valid when weights_path is None). - in_channels (int): Input data's channel number. - num_classes (int): - Number of categories for classification. - It controls the output size for final linear layer. - override_params (other key word params): - Params to override model's global_params. - Optional key: - 'width_coefficient', 'depth_coefficient', - 'image_size', 'dropout_rate', - 'batch_norm_momentum', - 'batch_norm_epsilon', 'drop_connect_rate', - 'depth_divisor', 'min_depth' - - Returns: - A pretrained efficientnet model. - """ - model = cls.from_name(model_name, num_classes=num_classes, **override_params) - load_pretrained_weights( - model, - model_name, - weights_path=weights_path, - load_fc=(num_classes == 1000), - advprop=advprop, - ) - model._change_in_channels(in_channels) - return model - - @classmethod - def get_image_size(cls, model_name): - """Get the input image size for a given efficientnet model. - - Args: - model_name (str): Name for efficientnet. - - Returns: - Input image size (resolution). - """ - cls._check_model_name_is_valid(model_name) - _, _, res, _ = efficientnet_params(model_name) - return res - - @classmethod - def _check_model_name_is_valid(cls, model_name): - """Validates model name. - - Args: - model_name (str): Name for efficientnet. - - Returns: - bool: Is a valid name or not. - """ - if model_name not in VALID_MODELS: - raise ValueError("model_name should be one of: " + ", ".join(VALID_MODELS)) - - def _change_in_channels(self, in_channels): - """Adjust model's first convolution layer to in_channels, if in_channels not equals 3. - - Args: - in_channels (int): Input data's channel number. - """ - if in_channels != 3: - Conv2d = get_same_padding_conv2d(image_size=self._global_params.image_size) - out_channels = round_filters(32, self._global_params) - self._conv_stem = Conv2d( - in_channels, out_channels, kernel_size=3, stride=2, bias=False - ) - - -"""utils.py - Helper functions for building the model and for loading model parameters. - These helper functions are built to mirror those in the official TensorFlow implementation. -""" - -# Author: lukemelas (github username) -# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch -# With adjustments and added comments by workingcoder (github username). - ################################################################################ # Help functions for model architecture @@ -553,15 +431,6 @@ def _change_in_channels(self, in_channels): GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) -# Swish activation function -if hasattr(nn, "SiLU"): - Swish = nn.SiLU -else: - # For compatibility with old PyTorch versions - class Swish(nn.Module): - def forward(self, x): - return x * torch.sigmoid(x) - # A memory-efficient implementation of Swish function class SwishImplementation(torch.autograd.Function): From 002ae3e07bec628d80efee42fdea96786448c604 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Thu, 16 Jan 2025 11:56:19 +0100 Subject: [PATCH 3/4] Standard Swish implementation --- .../encoders/_efficientnet.py | 44 +------------------ 1 file changed, 2 insertions(+), 42 deletions(-) diff --git a/segmentation_models_pytorch/encoders/_efficientnet.py b/segmentation_models_pytorch/encoders/_efficientnet.py index 8e710c95..84496e54 100644 --- a/segmentation_models_pytorch/encoders/_efficientnet.py +++ b/segmentation_models_pytorch/encoders/_efficientnet.py @@ -113,7 +113,7 @@ def __init__(self, block_args, global_params, image_size=None): self._bn2 = nn.BatchNorm2d( num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps ) - self._swish = MemoryEfficientSwish() + self._swish = nn.SiLU() def forward(self, inputs, drop_connect_rate=None): """MBConvBlock's forward function. @@ -165,14 +165,6 @@ def forward(self, inputs, drop_connect_rate=None): x = x + inputs # skip connection return x - def set_swish(self, memory_efficient=True): - """Sets swish function as memory efficient (for training) or standard (for export). - - Args: - memory_efficient (bool): Whether to use memory-efficient version of swish. - """ - self._swish = MemoryEfficientSwish() if memory_efficient else nn.SiLU() - class EfficientNet(nn.Module): """EfficientNet model. @@ -265,18 +257,7 @@ def __init__(self, blocks_args=None, global_params=None): self._dropout = nn.Dropout(self._global_params.dropout_rate) self._fc = nn.Linear(out_channels, self._global_params.num_classes) - # set activation to memory efficient swish by default - self._swish = MemoryEfficientSwish() - - def set_swish(self, memory_efficient=True): - """Sets swish function as memory efficient (for training) or standard (for export). - - Args: - memory_efficient (bool): Whether to use memory-efficient version of swish. - """ - self._swish = MemoryEfficientSwish() if memory_efficient else nn.SiLU() - for block in self._blocks: - block.set_swish(memory_efficient) + self._swish = nn.SiLU() def extract_endpoints(self, inputs): """Use convolution layer to extract features @@ -380,7 +361,6 @@ def forward(self, inputs): ################################################################################ # GlobalParams and BlockArgs: Two namedtuples -# Swish and MemoryEfficientSwish: Two implementations of the method # round_filters and round_repeats: # Functions to calculate params for scaling model width and depth ! ! ! # get_width_and_height_from_size and calculate_output_image_size @@ -432,26 +412,6 @@ def forward(self, inputs): BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) -# A memory-efficient implementation of Swish function -class SwishImplementation(torch.autograd.Function): - @staticmethod - def forward(ctx, i): - result = i * torch.sigmoid(i) - ctx.save_for_backward(i) - return result - - @staticmethod - def backward(ctx, grad_output): - i = ctx.saved_tensors[0] - sigmoid_i = torch.sigmoid(i) - return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) - - -class MemoryEfficientSwish(nn.Module): - def forward(self, x): - return SwishImplementation.apply(x) - - def round_filters(filters, global_params): """Calculate and round number of filters based on width multiplier. Use width_coefficient, depth_divisor and min_depth of global_params. From 549289c1315a92ee592eed96aa624ca3bd230937 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Thu, 16 Jan 2025 12:35:14 +0100 Subject: [PATCH 4/4] Remove additional cruft --- .../encoders/_efficientnet.py | 99 ------------------- 1 file changed, 99 deletions(-) diff --git a/segmentation_models_pytorch/encoders/_efficientnet.py b/segmentation_models_pytorch/encoders/_efficientnet.py index 84496e54..cb215201 100644 --- a/segmentation_models_pytorch/encoders/_efficientnet.py +++ b/segmentation_models_pytorch/encoders/_efficientnet.py @@ -16,21 +16,6 @@ from torch.utils import model_zoo -VALID_MODELS = ( - "efficientnet-b0", - "efficientnet-b1", - "efficientnet-b2", - "efficientnet-b3", - "efficientnet-b4", - "efficientnet-b5", - "efficientnet-b6", - "efficientnet-b7", - "efficientnet-b8", - # Support the construction of 'efficientnet-l2' without pretrained weights - "efficientnet-l2", -) - - class MBConvBlock(nn.Module): """Mobile Inverted Residual Bottleneck Block. @@ -772,7 +757,6 @@ def forward(self, x): # get_model_params and efficientnet: # Functions to get BlockArgs and GlobalParams for efficientnet # url_map and url_map_advprop: Dicts of url_map for pretrained weights -# load_pretrained_weights: A function to load pretrained weights class BlockDecoder(object): @@ -817,30 +801,6 @@ def _decode_block_string(block_string): id_skip=("noskip" not in block_string), ) - @staticmethod - def _encode_block_string(block): - """Encode a block to a string. - - Args: - block (namedtuple): A BlockArgs type argument. - - Returns: - block_string: A String form of BlockArgs. - """ - args = [ - "r%d" % block.num_repeat, - "k%d" % block.kernel_size, - "s%d%d" % (block.strides[0], block.strides[1]), - "e%s" % block.expand_ratio, - "i%d" % block.input_filters, - "o%d" % block.output_filters, - ] - if 0 < block.se_ratio <= 1: - args.append("se%s" % block.se_ratio) - if block.id_skip is False: - args.append("noskip") - return "_".join(args) - @staticmethod def decode(string_list): """Decode a list of string notations to specify blocks inside the network. @@ -857,21 +817,6 @@ def decode(string_list): blocks_args.append(BlockDecoder._decode_block_string(block_string)) return blocks_args - @staticmethod - def encode(blocks_args): - """Encode a list of BlockArgs to a list of strings. - - Args: - blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args. - - Returns: - block_strings: A list of strings, each string is a notation of block. - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - def efficientnet_params(model_name): """Map EfficientNet model name to parameter coefficients. @@ -1005,47 +950,3 @@ def get_model_params(model_name, override_params): "efficientnet-b7": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth", "efficientnet-b8": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth", } - -# TODO: add the petrained weights url map of 'efficientnet-l2' - - -def load_pretrained_weights( - model, model_name, weights_path=None, load_fc=True, advprop=False, verbose=True -): - """Loads pretrained weights from weights path or download using url. - - Args: - model (Module): The whole model of efficientnet. - model_name (str): Model name of efficientnet. - weights_path (None or str): - str: path to pretrained weights file on the local disk. - None: use pretrained weights downloaded from the Internet. - load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model. - advprop (bool): Whether to load pretrained weights - trained with advprop (valid when weights_path is None). - """ - if isinstance(weights_path, str): - state_dict = torch.load(weights_path) - else: - # AutoAugment or Advprop (different preprocessing) - url_map_ = url_map_advprop if advprop else url_map - state_dict = model_zoo.load_url(url_map_[model_name]) - - if load_fc: - ret = model.load_state_dict(state_dict, strict=False) - assert not ret.missing_keys, ( - "Missing keys when loading pretrained weights: {}".format(ret.missing_keys) - ) - else: - state_dict.pop("_fc.weight") - state_dict.pop("_fc.bias") - ret = model.load_state_dict(state_dict, strict=False) - assert set(ret.missing_keys) == set(["_fc.weight", "_fc.bias"]), ( - "Missing keys when loading pretrained weights: {}".format(ret.missing_keys) - ) - assert not ret.unexpected_keys, ( - "Missing keys when loading pretrained weights: {}".format(ret.unexpected_keys) - ) - - if verbose: - print("Loaded pretrained weights for {}".format(model_name))