diff --git a/pyproject.toml b/pyproject.toml index 645ec369..492f7ef9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ 'numpy>=1.19.3', 'pillow>=8', 'pretrainedmodels>=0.7.1', + 'safetensors>=0.3.1', 'six>=1.5', 'timm>=0.9', 'torch>=1.8', diff --git a/requirements/minimum.old b/requirements/minimum.old index 40bdc6ce..1adb97f5 100644 --- a/requirements/minimum.old +++ b/requirements/minimum.old @@ -2,6 +2,7 @@ huggingface-hub==0.24.0 numpy==1.19.3 pillow==8.0.0 pretrainedmodels==0.7.1 +safetensors==0.3.1 six==1.5.0 timm==0.9.0 torch==1.9.0 diff --git a/requirements/required.txt b/requirements/required.txt index 220e2ab9..6864e1f6 100644 --- a/requirements/required.txt +++ b/requirements/required.txt @@ -2,6 +2,7 @@ huggingface_hub==0.27.1 numpy==2.2.1 pillow==11.1.0 pretrainedmodels==0.7.4 +safetensors==0.5.2 six==1.17.0 timm==1.0.13 torch==2.5.1 diff --git a/segmentation_models_pytorch/encoders/__init__.py b/segmentation_models_pytorch/encoders/__init__.py index 3d71f49a..7c74ec61 100644 --- a/segmentation_models_pytorch/encoders/__init__.py +++ b/segmentation_models_pytorch/encoders/__init__.py @@ -1,8 +1,12 @@ +import json import timm import copy import warnings import functools -import torch.utils.model_zoo as model_zoo +from torch.utils.model_zoo import load_url +from huggingface_hub import hf_hub_download +from safetensors.torch import load_file + from .resnet import resnet_encoders from .dpn import dpn_encoders @@ -22,6 +26,7 @@ from .timm_universal import TimmUniversalEncoder from ._preprocessing import preprocess_input +from ._legacy_pretrained_settings import pretrained_settings __all__ = [ "encoders", @@ -101,15 +106,43 @@ def get_encoder(name, in_channels=3, depth=5, weights=None, output_stride=32, ** encoder = EncoderClass(**params) if weights is not None: - try: - settings = encoders[name]["pretrained_settings"][weights] - except KeyError: + if weights not in encoders[name]["pretrained_settings"]: + available_weights = list(encoders[name]["pretrained_settings"].keys()) raise KeyError( - "Wrong pretrained weights `{}` for encoder `{}`. Available options are: {}".format( - weights, name, list(encoders[name]["pretrained_settings"].keys()) - ) + f"Wrong pretrained weights `{weights}` for encoder `{name}`. " + f"Available options are: {available_weights}" + ) + + settings = encoders[name]["pretrained_settings"][weights] + repo_id = settings["repo_id"] + revision = settings["revision"] + + # First, try to load from HF-Hub, but as far as I know not all countries have + # access to the Hub (e.g. China), so we try to load from the original url if + # the first attempt fails. + weights_path = None + try: + hf_hub_download(repo_id, filename="config.json", revision=revision) + weights_path = hf_hub_download( + repo_id, filename="model.safetensors", revision=revision ) - encoder.load_state_dict(model_zoo.load_url(settings["url"])) + except Exception as e: + if name in pretrained_settings and weights in pretrained_settings[name]: + message = ( + f"Error loading {name} `{weights}` weights from Hugging Face Hub, " + "trying loading from original url..." + ) + warnings.warn(message, UserWarning) + url = pretrained_settings[name][weights]["url"] + state_dict = load_url(url, map_location="cpu") + else: + raise e + + if weights_path is not None: + state_dict = load_file(weights_path, device="cpu") + + # Load model weights + encoder.load_state_dict(state_dict) encoder.set_in_channels(in_channels, pretrained=weights is not None) if output_stride != 32: @@ -136,7 +169,25 @@ def get_preprocessing_params(encoder_name, pretrained="imagenet"): raise ValueError( "Available pretrained options {}".format(all_settings.keys()) ) - settings = all_settings[pretrained] + + repo_id = all_settings[pretrained]["repo_id"] + revision = all_settings[pretrained]["revision"] + + # Load config and model + try: + config_path = hf_hub_download( + repo_id, filename="config.json", revision=revision + ) + with open(config_path, "r") as f: + settings = json.load(f) + except Exception as e: + if ( + encoder_name in pretrained_settings + and pretrained in pretrained_settings[encoder_name] + ): + settings = pretrained_settings[encoder_name][pretrained] + else: + raise e formatted_settings = {} formatted_settings["input_space"] = settings.get("input_space", "RGB") diff --git a/segmentation_models_pytorch/encoders/_efficientnet.py b/segmentation_models_pytorch/encoders/_efficientnet.py index cb215201..dcc4e268 100644 --- a/segmentation_models_pytorch/encoders/_efficientnet.py +++ b/segmentation_models_pytorch/encoders/_efficientnet.py @@ -13,7 +13,6 @@ import math import collections from functools import partial -from torch.utils import model_zoo class MBConvBlock(nn.Module): diff --git a/segmentation_models_pytorch/encoders/_legacy_pretrained_settings.py b/segmentation_models_pytorch/encoders/_legacy_pretrained_settings.py new file mode 100644 index 00000000..21f5691e --- /dev/null +++ b/segmentation_models_pytorch/encoders/_legacy_pretrained_settings.py @@ -0,0 +1,1062 @@ +pretrained_settings = { + "resnet18": { + "imagenet": { + "url": "https://download.pytorch.org/models/resnet18-5c106cde.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "ssl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet18-d92f0530.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "swsl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet18-118f1556.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + }, + "resnet34": { + "imagenet": { + "url": "https://download.pytorch.org/models/resnet34-333f7ec4.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "resnet50": { + "imagenet": { + "url": "https://download.pytorch.org/models/resnet50-19c8e357.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "ssl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet50-08389792.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "swsl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet50-16a12f1b.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + }, + "resnet101": { + "imagenet": { + "url": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "resnet152": { + "imagenet": { + "url": "https://download.pytorch.org/models/resnet152-b121ed2d.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "resnext50_32x4d": { + "imagenet": { + "url": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "ssl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext50_32x4-ddb3e555.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "swsl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext50_32x4-72679e44.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + }, + "resnext101_32x4d": { + "ssl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x4-dc43570a.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "swsl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x4-3f87e46b.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + }, + "resnext101_32x8d": { + "imagenet": { + "url": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "instagram": { + "url": "https://download.pytorch.org/models/ig_resnext101_32x8-c38310e5.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "ssl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x8-2cfe2f8b.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "swsl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x8-b4712904.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + }, + "resnext101_32x16d": { + "instagram": { + "url": "https://download.pytorch.org/models/ig_resnext101_32x16-c6f796b0.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "ssl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x16-15fffa57.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + "swsl": { + "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x16-f3559a9c.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + }, + }, + "resnext101_32x32d": { + "instagram": { + "url": "https://download.pytorch.org/models/ig_resnext101_32x32-e4b90b00.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "resnext101_32x48d": { + "instagram": { + "url": "https://download.pytorch.org/models/ig_resnext101_32x48-3e41cc8a.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "dpn68": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn68-4af7d88d2.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.48627450980392156, 0.4588235294117647, 0.40784313725490196], + "std": [0.23482446870963955, 0.23482446870963955, 0.23482446870963955], + "num_classes": 1000, + } + }, + "dpn68b": { + "imagenet+5k": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn68b_extra-363ab9c19.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.48627450980392156, 0.4588235294117647, 0.40784313725490196], + "std": [0.23482446870963955, 0.23482446870963955, 0.23482446870963955], + "num_classes": 1000, + } + }, + "dpn92": { + "imagenet+5k": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn92_extra-fda993c95.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.48627450980392156, 0.4588235294117647, 0.40784313725490196], + "std": [0.23482446870963955, 0.23482446870963955, 0.23482446870963955], + "num_classes": 1000, + } + }, + "dpn98": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn98-722954780.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.48627450980392156, 0.4588235294117647, 0.40784313725490196], + "std": [0.23482446870963955, 0.23482446870963955, 0.23482446870963955], + "num_classes": 1000, + } + }, + "dpn107": { + "imagenet+5k": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn107_extra-b7f9f4cc9.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.48627450980392156, 0.4588235294117647, 0.40784313725490196], + "std": [0.23482446870963955, 0.23482446870963955, 0.23482446870963955], + "num_classes": 1000, + } + }, + "dpn131": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn131-7af84be88.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.48627450980392156, 0.4588235294117647, 0.40784313725490196], + "std": [0.23482446870963955, 0.23482446870963955, 0.23482446870963955], + "num_classes": 1000, + } + }, + "vgg11": { + "imagenet": { + "url": "https://download.pytorch.org/models/vgg11-bbd30ac9.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "vgg11_bn": { + "imagenet": { + "url": "https://download.pytorch.org/models/vgg11_bn-6002323d.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "vgg13": { + "imagenet": { + "url": "https://download.pytorch.org/models/vgg13-c768596a.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "vgg13_bn": { + "imagenet": { + "url": "https://download.pytorch.org/models/vgg13_bn-abd245e5.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "vgg16": { + "imagenet": { + "url": "https://download.pytorch.org/models/vgg16-397923af.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "vgg16_bn": { + "imagenet": { + "url": "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "vgg19": { + "imagenet": { + "url": "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "vgg19_bn": { + "imagenet": { + "url": "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "senet154": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "se_resnet50": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "se_resnet101": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "se_resnet152": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "se_resnext50_32x4d": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "se_resnext101_32x4d": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "densenet121": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/densenet121-fbdb23505.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "densenet169": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/densenet169-f470b90a4.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "densenet201": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/densenet201-5750cbb1e.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "densenet161": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/densenet161-347e6b360.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "inceptionresnetv2": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth", + "input_space": "RGB", + "input_size": [3, 299, 299], + "input_range": [0, 1], + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "num_classes": 1000, + }, + "imagenet+background": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth", + "input_space": "RGB", + "input_size": [3, 299, 299], + "input_range": [0, 1], + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "num_classes": 1001, + }, + }, + "inceptionv4": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth", + "input_space": "RGB", + "input_size": [3, 299, 299], + "input_range": [0, 1], + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "num_classes": 1000, + }, + "imagenet+background": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth", + "input_space": "RGB", + "input_size": [3, 299, 299], + "input_range": [0, 1], + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "num_classes": 1001, + }, + }, + "efficientnet-b0": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + "advprop": { + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + }, + "efficientnet-b1": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + "advprop": { + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + }, + "efficientnet-b2": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + "advprop": { + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + }, + "efficientnet-b3": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + "advprop": { + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + }, + "efficientnet-b4": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + "advprop": { + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + }, + "efficientnet-b5": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + "advprop": { + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + }, + "efficientnet-b6": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + "advprop": { + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + }, + "efficientnet-b7": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + "advprop": { + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "url": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth", + "input_space": "RGB", + "input_range": [0, 1], + }, + }, + "mobilenet_v2": { + "imagenet": { + "url": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "xception": { + "imagenet": { + "url": "http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth", + "input_space": "RGB", + "input_size": [3, 299, 299], + "input_range": [0, 1], + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "num_classes": 1000, + "scale": 0.8975, + } + }, + "timm-efficientnet-b0": { + "imagenet": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/huggingface/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0-0af12548.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "advprop": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ap-f262efe1.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "noisy-student": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ns-c0e6a31c.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + }, + "timm-efficientnet-b1": { + "imagenet": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/huggingface/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1-5c1377c4.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "advprop": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_ap-44ef0a3d.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "noisy-student": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_ns-99dd0c41.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + }, + "timm-efficientnet-b2": { + "imagenet": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/huggingface/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2-e393ef04.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "advprop": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_ap-2f8e7636.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "noisy-student": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_ns-00306e48.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + }, + "timm-efficientnet-b3": { + "imagenet": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/huggingface/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3-e3bd6955.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "advprop": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_ap-aad25bdd.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "noisy-student": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_ns-9d44bf68.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + }, + "timm-efficientnet-b4": { + "imagenet": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/huggingface/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4-74ee3bed.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "advprop": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_ap-dedb23e6.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "noisy-student": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_ns-d6313a46.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + }, + "timm-efficientnet-b5": { + "imagenet": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/huggingface/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5-c6949ce9.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "advprop": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ap-9e82fae8.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "noisy-student": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ns-6f26d0cf.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + }, + "timm-efficientnet-b6": { + "imagenet": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_aa-80ba17e4.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "advprop": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_ap-4ffb161f.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "noisy-student": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_ns-51548356.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + }, + "timm-efficientnet-b7": { + "imagenet": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/huggingface/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_aa-076e3472.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "advprop": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ap-ddb28fec.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "noisy-student": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ns-1dbc32de.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + }, + "timm-efficientnet-b8": { + "imagenet": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ra-572d5dd9.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "advprop": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + }, + "timm-efficientnet-l2": { + "noisy-student": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_l2_ns-df73bb44.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + "noisy-student-475": { + "mean": (0.485, 0.456, 0.406), + "std": (0.229, 0.224, 0.225), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_l2_ns_475-bebbd00a.pth", + "input_range": (0, 1), + "input_space": "RGB", + }, + }, + "timm-tf_efficientnet_lite0": { + "imagenet": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite0-0aa007d2.pth", + "input_range": (0, 1), + "input_space": "RGB", + } + }, + "timm-tf_efficientnet_lite1": { + "imagenet": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite1-bde8b488.pth", + "input_range": (0, 1), + "input_space": "RGB", + } + }, + "timm-tf_efficientnet_lite2": { + "imagenet": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite2-dcccb7df.pth", + "input_range": (0, 1), + "input_space": "RGB", + } + }, + "timm-tf_efficientnet_lite3": { + "imagenet": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite3-b733e338.pth", + "input_range": (0, 1), + "input_space": "RGB", + } + }, + "timm-tf_efficientnet_lite4": { + "imagenet": { + "mean": (0.5, 0.5, 0.5), + "std": (0.5, 0.5, 0.5), + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite4-741542c3.pth", + "input_range": (0, 1), + "input_space": "RGB", + } + }, + "timm-skresnet18": { + "imagenet": { + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet18_ra-4eec2804.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "timm-skresnet34": { + "imagenet": { + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet34_ra-bdc0ccde.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "timm-skresnext50_32x4d": { + "imagenet": { + "url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnext50_ra-f40e40bf.pth", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + }, + "mit_b0": { + "imagenet": { + "url": "https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/mit_b0.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + } + }, + "mit_b1": { + "imagenet": { + "url": "https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/mit_b1.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + } + }, + "mit_b2": { + "imagenet": { + "url": "https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/mit_b2.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + } + }, + "mit_b3": { + "imagenet": { + "url": "https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/mit_b3.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + } + }, + "mit_b4": { + "imagenet": { + "url": "https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/mit_b4.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + } + }, + "mit_b5": { + "imagenet": { + "url": "https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/mit_b5.pth", + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + } + }, + "mobileone_s0": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s0_unfused.pth.tar", + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "mobileone_s1": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s1_unfused.pth.tar", + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "mobileone_s2": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s2_unfused.pth.tar", + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "mobileone_s3": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s3_unfused.pth.tar", + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "mobileone_s4": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s4_unfused.pth.tar", + "input_space": "RGB", + "input_range": [0, 1], + } + }, +} diff --git a/segmentation_models_pytorch/encoders/densenet.py b/segmentation_models_pytorch/encoders/densenet.py index 3ce9b3d0..ad0e0c25 100644 --- a/segmentation_models_pytorch/encoders/densenet.py +++ b/segmentation_models_pytorch/encoders/densenet.py @@ -110,92 +110,65 @@ def load_state_dict(self, state_dict): super().load_state_dict(state_dict) -pretrained_settings = { - "densenet121": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/densenet121-fbdb23505.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "densenet169": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/densenet169-f470b90a4.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "densenet201": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/densenet201-5750cbb1e.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "densenet161": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/densenet161-347e6b360.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, -} - densenet_encoders = { "densenet121": { "encoder": DenseNetEncoder, - "pretrained_settings": pretrained_settings["densenet121"], "params": { "out_channels": [3, 64, 256, 512, 1024, 1024], "num_init_features": 64, "growth_rate": 32, "block_config": (6, 12, 24, 16), }, + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/densenet121.imagenet", + "revision": "a17c96896a265b61338f66f61d3887b24f61995a", + } + }, }, "densenet169": { "encoder": DenseNetEncoder, - "pretrained_settings": pretrained_settings["densenet169"], "params": { "out_channels": [3, 64, 256, 512, 1280, 1664], "num_init_features": 64, "growth_rate": 32, "block_config": (6, 12, 32, 32), }, + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/densenet169.imagenet", + "revision": "8facfba9fc72f7750879dac9ac6ceb3ab990de8d", + } + }, }, "densenet201": { "encoder": DenseNetEncoder, - "pretrained_settings": pretrained_settings["densenet201"], "params": { "out_channels": [3, 64, 256, 512, 1792, 1920], "num_init_features": 64, "growth_rate": 32, "block_config": (6, 12, 48, 32), }, + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/densenet201.imagenet", + "revision": "ed5deb355d71659391d46fae5e7587460fbb5f84", + } + }, }, "densenet161": { "encoder": DenseNetEncoder, - "pretrained_settings": pretrained_settings["densenet161"], "params": { "out_channels": [3, 96, 384, 768, 2112, 2208], "num_init_features": 96, "growth_rate": 48, "block_config": (6, 12, 36, 24), }, + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/densenet161.imagenet", + "revision": "9afe0fec51ab2a627141769d97d6f83756d78446", + } + }, }, } diff --git a/segmentation_models_pytorch/encoders/dpn.py b/segmentation_models_pytorch/encoders/dpn.py index 1034540d..4fe84328 100644 --- a/segmentation_models_pytorch/encoders/dpn.py +++ b/segmentation_models_pytorch/encoders/dpn.py @@ -101,79 +101,15 @@ def load_state_dict(self, state_dict, **kwargs): super().load_state_dict(state_dict, **kwargs) -pretrained_settings = { - "dpn68": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn68-4af7d88d2.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [124 / 255, 117 / 255, 104 / 255], - "std": [1 / (0.0167 * 255)] * 3, - "num_classes": 1000, - } - }, - "dpn68b": { - "imagenet+5k": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn68b_extra-363ab9c19.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [124 / 255, 117 / 255, 104 / 255], - "std": [1 / (0.0167 * 255)] * 3, - "num_classes": 1000, - } - }, - "dpn92": { - "imagenet+5k": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn92_extra-fda993c95.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [124 / 255, 117 / 255, 104 / 255], - "std": [1 / (0.0167 * 255)] * 3, - "num_classes": 1000, - } - }, - "dpn98": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn98-722954780.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [124 / 255, 117 / 255, 104 / 255], - "std": [1 / (0.0167 * 255)] * 3, - "num_classes": 1000, - } - }, - "dpn131": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn131-7af84be88.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [124 / 255, 117 / 255, 104 / 255], - "std": [1 / (0.0167 * 255)] * 3, - "num_classes": 1000, - } - }, - "dpn107": { - "imagenet+5k": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/dpn107_extra-b7f9f4cc9.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [124 / 255, 117 / 255, 104 / 255], - "std": [1 / (0.0167 * 255)] * 3, - "num_classes": 1000, - } - }, -} - dpn_encoders = { "dpn68": { "encoder": DPNEncoder, - "pretrained_settings": pretrained_settings["dpn68"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/dpn68.imagenet", + "revision": "c209aefdeae6bc93937556629e974b44d4e58535", + } + }, "params": { "stage_idxs": [4, 8, 20, 24], "out_channels": [3, 10, 144, 320, 704, 832], @@ -189,7 +125,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "dpn68b": { "encoder": DPNEncoder, - "pretrained_settings": pretrained_settings["dpn68b"], + "pretrained_settings": { + "imagenet+5k": { + "repo_id": "smp-hub/dpn68b.imagenet-5k", + "revision": "6c6615e77688e390ae0eaa81e26821fbd83cee4b", + } + }, "params": { "stage_idxs": [4, 8, 20, 24], "out_channels": [3, 10, 144, 320, 704, 832], @@ -206,7 +147,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "dpn92": { "encoder": DPNEncoder, - "pretrained_settings": pretrained_settings["dpn92"], + "pretrained_settings": { + "imagenet+5k": { + "repo_id": "smp-hub/dpn92.imagenet-5k", + "revision": "d231f51ce4ad2c84ed5fcaf4ef0cfece6814a526", + } + }, "params": { "stage_idxs": [4, 8, 28, 32], "out_channels": [3, 64, 336, 704, 1552, 2688], @@ -221,7 +167,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "dpn98": { "encoder": DPNEncoder, - "pretrained_settings": pretrained_settings["dpn98"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/dpn98.imagenet", + "revision": "b2836c86216c1ddce980d832f7deaa4ca22babd3", + } + }, "params": { "stage_idxs": [4, 10, 30, 34], "out_channels": [3, 96, 336, 768, 1728, 2688], @@ -236,7 +187,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "dpn107": { "encoder": DPNEncoder, - "pretrained_settings": pretrained_settings["dpn107"], + "pretrained_settings": { + "imagenet+5k": { + "repo_id": "smp-hub/dpn107.imagenet-5k", + "revision": "dab4cd6b8b79de3db970f2dbff85359a8847db05", + } + }, "params": { "stage_idxs": [5, 13, 33, 37], "out_channels": [3, 128, 376, 1152, 2432, 2688], @@ -251,7 +207,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "dpn131": { "encoder": DPNEncoder, - "pretrained_settings": pretrained_settings["dpn131"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/dpn131.imagenet", + "revision": "04bbb9f415ca2bb59f3d8227857967b74698515e", + } + }, "params": { "stage_idxs": [5, 13, 41, 45], "out_channels": [3, 128, 352, 832, 1984, 2688], diff --git a/segmentation_models_pytorch/encoders/efficientnet.py b/segmentation_models_pytorch/encoders/efficientnet.py index f51635ff..3ea9f1d7 100644 --- a/segmentation_models_pytorch/encoders/efficientnet.py +++ b/segmentation_models_pytorch/encoders/efficientnet.py @@ -27,7 +27,7 @@ from typing import List, Dict, Sequence from ._base import EncoderMixin -from ._efficientnet import EfficientNet, url_map, url_map_advprop, get_model_params +from ._efficientnet import EfficientNet, get_model_params class EfficientNetEncoder(EfficientNet, EncoderMixin): @@ -107,30 +107,19 @@ def load_state_dict(self, state_dict, **kwargs): super().load_state_dict(state_dict, **kwargs) -def _get_pretrained_settings(encoder): - pretrained_settings = { - "imagenet": { - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "url": url_map[encoder], - "input_space": "RGB", - "input_range": [0, 1], - }, - "advprop": { - "mean": [0.5, 0.5, 0.5], - "std": [0.5, 0.5, 0.5], - "url": url_map_advprop[encoder], - "input_space": "RGB", - "input_range": [0, 1], - }, - } - return pretrained_settings - - efficient_net_encoders = { "efficientnet-b0": { "encoder": EfficientNetEncoder, - "pretrained_settings": _get_pretrained_settings("efficientnet-b0"), + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/efficientnet-b0.imagenet", + "revision": "1bbe7ecc1d5ea1d2058de1a2db063b8701aff314", + }, + "advprop": { + "repo_id": "smp-hub/efficientnet-b0.advprop", + "revision": "29043c08140d9c6ee7de1468d55923f2b06bcec2", + }, + }, "params": { "out_channels": [3, 32, 24, 40, 112, 320], "stage_idxs": [3, 5, 9, 16], @@ -139,7 +128,16 @@ def _get_pretrained_settings(encoder): }, "efficientnet-b1": { "encoder": EfficientNetEncoder, - "pretrained_settings": _get_pretrained_settings("efficientnet-b1"), + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/efficientnet-b1.imagenet", + "revision": "5d637466a5215de300a8ccb13a39357df2df2bf4", + }, + "advprop": { + "repo_id": "smp-hub/efficientnet-b1.advprop", + "revision": "2e518b8b0955bbab467f50525578dab6b6086afc", + }, + }, "params": { "out_channels": [3, 32, 24, 40, 112, 320], "stage_idxs": [5, 8, 16, 23], @@ -148,7 +146,16 @@ def _get_pretrained_settings(encoder): }, "efficientnet-b2": { "encoder": EfficientNetEncoder, - "pretrained_settings": _get_pretrained_settings("efficientnet-b2"), + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/efficientnet-b2.imagenet", + "revision": "a96d4f0295ffbae18ebba173bf7f3c0c8f21990e", + }, + "advprop": { + "repo_id": "smp-hub/efficientnet-b2.advprop", + "revision": "be788c20dfb0bbe83b4c439f9cfe0dd937c0783e", + }, + }, "params": { "out_channels": [3, 32, 24, 48, 120, 352], "stage_idxs": [5, 8, 16, 23], @@ -157,7 +164,16 @@ def _get_pretrained_settings(encoder): }, "efficientnet-b3": { "encoder": EfficientNetEncoder, - "pretrained_settings": _get_pretrained_settings("efficientnet-b3"), + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/efficientnet-b3.imagenet", + "revision": "074c54a6c473e0d294690d49cedb6cf463e7127d", + }, + "advprop": { + "repo_id": "smp-hub/efficientnet-b3.advprop", + "revision": "9ccc166d87bd9c08d6bed4477638c7f4bb3eec78", + }, + }, "params": { "out_channels": [3, 40, 32, 48, 136, 384], "stage_idxs": [5, 8, 18, 26], @@ -166,7 +182,16 @@ def _get_pretrained_settings(encoder): }, "efficientnet-b4": { "encoder": EfficientNetEncoder, - "pretrained_settings": _get_pretrained_settings("efficientnet-b4"), + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/efficientnet-b4.imagenet", + "revision": "05cd5dde5dab658f00c463f9b9aa0ced76784f40", + }, + "advprop": { + "repo_id": "smp-hub/efficientnet-b4.advprop", + "revision": "f04caa809ea4eb08ee9e7fd555f5514ebe2a9ef5", + }, + }, "params": { "out_channels": [3, 48, 32, 56, 160, 448], "stage_idxs": [6, 10, 22, 32], @@ -175,7 +200,16 @@ def _get_pretrained_settings(encoder): }, "efficientnet-b5": { "encoder": EfficientNetEncoder, - "pretrained_settings": _get_pretrained_settings("efficientnet-b5"), + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/efficientnet-b5.imagenet", + "revision": "69f4d28460a4e421b7860bc26ee7d832e03e01ca", + }, + "advprop": { + "repo_id": "smp-hub/efficientnet-b5.advprop", + "revision": "dabe78fc8ab7ce93ddc2bb156b01db227caede88", + }, + }, "params": { "out_channels": [3, 48, 40, 64, 176, 512], "stage_idxs": [8, 13, 27, 39], @@ -184,7 +218,16 @@ def _get_pretrained_settings(encoder): }, "efficientnet-b6": { "encoder": EfficientNetEncoder, - "pretrained_settings": _get_pretrained_settings("efficientnet-b6"), + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/efficientnet-b6.imagenet", + "revision": "8570752016f7c62ae149cffa058550fe44e21c8b", + }, + "advprop": { + "repo_id": "smp-hub/efficientnet-b6.advprop", + "revision": "c2dbb4d1359151165ec7b96cfe54a9cac2142a31", + }, + }, "params": { "out_channels": [3, 56, 40, 72, 200, 576], "stage_idxs": [9, 15, 31, 45], @@ -193,7 +236,16 @@ def _get_pretrained_settings(encoder): }, "efficientnet-b7": { "encoder": EfficientNetEncoder, - "pretrained_settings": _get_pretrained_settings("efficientnet-b7"), + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/efficientnet-b7.imagenet", + "revision": "5a5dbe687d612ebc3dca248274fd1191111deda6", + }, + "advprop": { + "repo_id": "smp-hub/efficientnet-b7.advprop", + "revision": "ce33edb4e80c0cde268f098ae2299e23f615577d", + }, + }, "params": { "out_channels": [3, 64, 48, 80, 224, 640], "stage_idxs": [11, 18, 38, 55], diff --git a/segmentation_models_pytorch/encoders/inceptionresnetv2.py b/segmentation_models_pytorch/encoders/inceptionresnetv2.py index 3ac662e2..15bf6502 100644 --- a/segmentation_models_pytorch/encoders/inceptionresnetv2.py +++ b/segmentation_models_pytorch/encoders/inceptionresnetv2.py @@ -118,22 +118,12 @@ def load_state_dict(self, state_dict, **kwargs): "encoder": InceptionResNetV2Encoder, "pretrained_settings": { "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth", - "input_space": "RGB", - "input_size": [3, 299, 299], - "input_range": [0, 1], - "mean": [0.5, 0.5, 0.5], - "std": [0.5, 0.5, 0.5], - "num_classes": 1000, + "repo_id": "smp-hub/inceptionresnetv2.imagenet", + "revision": "120c5afdbb80a1c989db0a7423ebb7a9db9b1e6c", }, "imagenet+background": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth", - "input_space": "RGB", - "input_size": [3, 299, 299], - "input_range": [0, 1], - "mean": [0.5, 0.5, 0.5], - "std": [0.5, 0.5, 0.5], - "num_classes": 1001, + "repo_id": "smp-hub/inceptionresnetv2.imagenet-background", + "revision": "3ecf3491658dc0f6a76d69c9d1cb36511b1ee56c", }, }, "params": {"out_channels": [3, 64, 192, 320, 1088, 1536], "num_classes": 1000}, diff --git a/segmentation_models_pytorch/encoders/inceptionv4.py b/segmentation_models_pytorch/encoders/inceptionv4.py index c5b79b02..12a7cc1b 100644 --- a/segmentation_models_pytorch/encoders/inceptionv4.py +++ b/segmentation_models_pytorch/encoders/inceptionv4.py @@ -99,22 +99,12 @@ def load_state_dict(self, state_dict, **kwargs): "encoder": InceptionV4Encoder, "pretrained_settings": { "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth", - "input_space": "RGB", - "input_size": [3, 299, 299], - "input_range": [0, 1], - "mean": [0.5, 0.5, 0.5], - "std": [0.5, 0.5, 0.5], - "num_classes": 1000, + "repo_id": "smp-hub/inceptionv4.imagenet", + "revision": "918fb54f07811d82a4ecde3a51156041d0facba9", }, "imagenet+background": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth", - "input_space": "RGB", - "input_size": [3, 299, 299], - "input_range": [0, 1], - "mean": [0.5, 0.5, 0.5], - "std": [0.5, 0.5, 0.5], - "num_classes": 1001, + "repo_id": "smp-hub/inceptionv4.imagenet-background", + "revision": "8c2a48e20d2709ee64f8421c61be309f05bfa536", }, }, "params": { diff --git a/segmentation_models_pytorch/encoders/mix_transformer.py b/segmentation_models_pytorch/encoders/mix_transformer.py index 7430dd4d..d5dca7fd 100644 --- a/segmentation_models_pytorch/encoders/mix_transformer.py +++ b/segmentation_models_pytorch/encoders/mix_transformer.py @@ -591,21 +591,15 @@ def load_state_dict(self, state_dict): return super().load_state_dict(state_dict) -def get_pretrained_cfg(name): - return { - "url": f"https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/{name}.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - } - - mix_transformer_encoders = { "mit_b0": { "encoder": MixVisionTransformerEncoder, - "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b0")}, + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/mit_b0.imagenet", + "revision": "9ce53d104d92d75aabb00aae70677aaab67e7c84", + } + }, "params": { "out_channels": [3, 0, 32, 64, 160, 256], "patch_size": 4, @@ -622,7 +616,12 @@ def get_pretrained_cfg(name): }, "mit_b1": { "encoder": MixVisionTransformerEncoder, - "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b1")}, + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/mit_b1.imagenet", + "revision": "a04bf4f13a549bce677cf79b04852e7510782817", + } + }, "params": { "out_channels": [3, 0, 64, 128, 320, 512], "patch_size": 4, @@ -639,7 +638,12 @@ def get_pretrained_cfg(name): }, "mit_b2": { "encoder": MixVisionTransformerEncoder, - "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b2")}, + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/mit_b2.imagenet", + "revision": "868ab6f13871dcf8c3d9f90ee4519403475b65ef", + } + }, "params": { "out_channels": [3, 0, 64, 128, 320, 512], "patch_size": 4, @@ -656,7 +660,12 @@ def get_pretrained_cfg(name): }, "mit_b3": { "encoder": MixVisionTransformerEncoder, - "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b3")}, + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/mit_b3.imagenet", + "revision": "32558d12a65f1daa0ebcf4f4053c4285e2c1cbda", + } + }, "params": { "out_channels": [3, 0, 64, 128, 320, 512], "patch_size": 4, @@ -673,7 +682,12 @@ def get_pretrained_cfg(name): }, "mit_b4": { "encoder": MixVisionTransformerEncoder, - "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b4")}, + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/mit_b4.imagenet", + "revision": "3a3454e900a4b4f11dd60eeb59101a9a1a36b017", + } + }, "params": { "out_channels": [3, 0, 64, 128, 320, 512], "patch_size": 4, @@ -690,7 +704,12 @@ def get_pretrained_cfg(name): }, "mit_b5": { "encoder": MixVisionTransformerEncoder, - "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b5")}, + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/mit_b5.imagenet", + "revision": "ced04d96c586b6297fd59a7a1e244fc78fdb6531", + } + }, "params": { "out_channels": [3, 0, 64, 128, 320, 512], "patch_size": 4, diff --git a/segmentation_models_pytorch/encoders/mobilenet.py b/segmentation_models_pytorch/encoders/mobilenet.py index af7fc122..793a9be2 100644 --- a/segmentation_models_pytorch/encoders/mobilenet.py +++ b/segmentation_models_pytorch/encoders/mobilenet.py @@ -85,11 +85,8 @@ def load_state_dict(self, state_dict, **kwargs): "encoder": MobileNetV2Encoder, "pretrained_settings": { "imagenet": { - "url": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth", - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "input_space": "RGB", - "input_range": [0, 1], + "repo_id": "smp-hub/mobilenet_v2.imagenet", + "revision": "e67aa804e17f7b404b629127eabbd224c4e0690b", } }, "params": {"out_channels": [3, 16, 24, 32, 96, 1280]}, diff --git a/segmentation_models_pytorch/encoders/mobileone.py b/segmentation_models_pytorch/encoders/mobileone.py index 3430b978..ba2947d0 100644 --- a/segmentation_models_pytorch/encoders/mobileone.py +++ b/segmentation_models_pytorch/encoders/mobileone.py @@ -493,11 +493,8 @@ def reparameterize_model(model: torch.nn.Module) -> nn.Module: "encoder": MobileOne, "pretrained_settings": { "imagenet": { - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s0_unfused.pth.tar", # noqa - "input_space": "RGB", - "input_range": [0, 1], + "repo_id": "smp-hub/mobileone_s0.imagenet", + "revision": "f52815cf0ad29278a9860c9cd5fabf19f904bedf", } }, "params": { @@ -511,11 +508,8 @@ def reparameterize_model(model: torch.nn.Module) -> nn.Module: "encoder": MobileOne, "pretrained_settings": { "imagenet": { - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s1_unfused.pth.tar", # noqa - "input_space": "RGB", - "input_range": [0, 1], + "repo_id": "smp-hub/mobileone_s1.imagenet", + "revision": "5707a98852b762cd8e0c43b5c8c729cd28496677", } }, "params": { @@ -528,11 +522,8 @@ def reparameterize_model(model: torch.nn.Module) -> nn.Module: "encoder": MobileOne, "pretrained_settings": { "imagenet": { - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s2_unfused.pth.tar", # noqa - "input_space": "RGB", - "input_range": [0, 1], + "repo_id": "smp-hub/mobileone_s2.imagenet", + "revision": "ddc3db8fa40d271902c7a8c95cee6691f617d551", } }, "params": { @@ -545,11 +536,8 @@ def reparameterize_model(model: torch.nn.Module) -> nn.Module: "encoder": MobileOne, "pretrained_settings": { "imagenet": { - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s3_unfused.pth.tar", # noqa - "input_space": "RGB", - "input_range": [0, 1], + "repo_id": "smp-hub/mobileone_s3.imagenet", + "revision": "da89b84a91b7400c366c358bfbf8dd0b2fa4dde2", } }, "params": { @@ -562,11 +550,8 @@ def reparameterize_model(model: torch.nn.Module) -> nn.Module: "encoder": MobileOne, "pretrained_settings": { "imagenet": { - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s4_unfused.pth.tar", # noqa - "input_space": "RGB", - "input_range": [0, 1], + "repo_id": "smp-hub/mobileone_s4.imagenet", + "revision": "16197c55d599076b6aae67a83d3b3f70c31b097c", } }, "params": { diff --git a/segmentation_models_pytorch/encoders/resnet.py b/segmentation_models_pytorch/encoders/resnet.py index 383af002..d4f2db4e 100644 --- a/segmentation_models_pytorch/encoders/resnet.py +++ b/segmentation_models_pytorch/encoders/resnet.py @@ -92,224 +92,23 @@ def load_state_dict(self, state_dict, **kwargs): super().load_state_dict(state_dict, **kwargs) -pretrained_settings = { - "resnet18": { - "imagenet": { - "url": "https://download.pytorch.org/models/resnet18-5c106cde.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "ssl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet18-d92f0530.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "swsl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet18-118f1556.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - }, - "resnet34": { - "imagenet": { - "url": "https://download.pytorch.org/models/resnet34-333f7ec4.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "resnet50": { - "imagenet": { - "url": "https://download.pytorch.org/models/resnet50-19c8e357.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "ssl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet50-08389792.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "swsl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet50-16a12f1b.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - }, - "resnet101": { - "imagenet": { - "url": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "resnet152": { - "imagenet": { - "url": "https://download.pytorch.org/models/resnet152-b121ed2d.pth", - "input_space": "RGB", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "resnext50_32x4d": { - "imagenet": { - "url": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "ssl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext50_32x4-ddb3e555.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "swsl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext50_32x4-72679e44.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - }, - "resnext101_32x4d": { - "ssl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x4-dc43570a.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "swsl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x4-3f87e46b.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - }, - "resnext101_32x8d": { - "imagenet": { - "url": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "instagram": { - "url": "https://download.pytorch.org/models/ig_resnext101_32x8-c38310e5.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "ssl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x8-2cfe2f8b.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "swsl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x8-b4712904.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - }, - "resnext101_32x16d": { - "instagram": { - "url": "https://download.pytorch.org/models/ig_resnext101_32x16-c6f796b0.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "ssl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x16-15fffa57.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - "swsl": { - "url": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x16-f3559a9c.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - }, - }, - "resnext101_32x32d": { - "instagram": { - "url": "https://download.pytorch.org/models/ig_resnext101_32x32-e4b90b00.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, - "resnext101_32x48d": { - "instagram": { - "url": "https://download.pytorch.org/models/ig_resnext101_32x48-3e41cc8a.pth", - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - }, -} - resnet_encoders = { "resnet18": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnet18"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/resnet18.imagenet", + "revision": "3f2325ff978283d47aa6a1d6878ca20565622683", + }, + "ssl": { + "repo_id": "smp-hub/resnet18.ssl", + "revision": "d600d5116aac2e6e595f99f40612074c723c00b2", + }, + "swsl": { + "repo_id": "smp-hub/resnet18.swsl", + "revision": "0e3a35d4d8e344088c14a96eee502a88ac70eae1", + }, + }, "params": { "out_channels": [3, 64, 64, 128, 256, 512], "block": BasicBlock, @@ -318,7 +117,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "resnet34": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnet34"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/resnet34.imagenet", + "revision": "7a57b34f723329ff020b3f8bc41771163c519d0c", + }, + }, "params": { "out_channels": [3, 64, 64, 128, 256, 512], "block": BasicBlock, @@ -327,7 +131,20 @@ def load_state_dict(self, state_dict, **kwargs): }, "resnet50": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnet50"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/resnet50.imagenet", + "revision": "00cb74e366966d59cd9a35af57e618af9f88efe9", + }, + "ssl": { + "repo_id": "smp-hub/resnet50.ssl", + "revision": "d07daf5b4377f3700c6ac61906b0aafbc4eca46b", + }, + "swsl": { + "repo_id": "smp-hub/resnet50.swsl", + "revision": "b9520cce124f91c6fe7eee45721a2c7954f0d8c0", + }, + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": Bottleneck, @@ -336,7 +153,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "resnet101": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnet101"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/resnet101.imagenet", + "revision": "cd7c15e8c51da86ae6a084515fdb962d0c94e7d1", + }, + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": Bottleneck, @@ -345,7 +167,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "resnet152": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnet152"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/resnet152.imagenet", + "revision": "951dd835e9d086628e447b484584c8983f9e1dd0", + }, + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": Bottleneck, @@ -354,7 +181,20 @@ def load_state_dict(self, state_dict, **kwargs): }, "resnext50_32x4d": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnext50_32x4d"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/resnext50_32x4d.imagenet", + "revision": "329793c85d62fd340ae42ae39fb905a63df872e7", + }, + "ssl": { + "repo_id": "smp-hub/resnext50_32x4d.ssl", + "revision": "9b67cff77d060c7044493a58c24d1007c1eb06c3", + }, + "swsl": { + "repo_id": "smp-hub/resnext50_32x4d.swsl", + "revision": "52e6e49da61b8e26ca691e1aef2cbb952884057d", + }, + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": Bottleneck, @@ -365,7 +205,16 @@ def load_state_dict(self, state_dict, **kwargs): }, "resnext101_32x4d": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnext101_32x4d"], + "pretrained_settings": { + "ssl": { + "repo_id": "smp-hub/resnext101_32x4d.ssl", + "revision": "b39796c8459084d13523b7016c3ef13a2e9e472b", + }, + "swsl": { + "repo_id": "smp-hub/resnext101_32x4d.swsl", + "revision": "3f8355b4892a31f001a832b49b2b01484d48516a", + }, + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": Bottleneck, @@ -376,7 +225,24 @@ def load_state_dict(self, state_dict, **kwargs): }, "resnext101_32x8d": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnext101_32x8d"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/resnext101_32x8d.imagenet", + "revision": "221af6198d03a4ee88992f78a1ee81b46a52d339", + }, + "instagram": { + "repo_id": "smp-hub/resnext101_32x8d.instagram", + "revision": "44cd927aa6e64673ffe9d31230bad44abc18b823", + }, + "ssl": { + "repo_id": "smp-hub/resnext101_32x8d.ssl", + "revision": "723a95ddeed335c9488c37c6cbef13d779ac8f97", + }, + "swsl": { + "repo_id": "smp-hub/resnext101_32x8d.swsl", + "revision": "58cf0bb65f91365470398080d9588b187d1777c4", + }, + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": Bottleneck, @@ -387,7 +253,20 @@ def load_state_dict(self, state_dict, **kwargs): }, "resnext101_32x16d": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnext101_32x16d"], + "pretrained_settings": { + "instagram": { + "repo_id": "smp-hub/resnext101_32x16d.instagram", + "revision": "64e8e320eeae6501185b0627b2429a68e52d050c", + }, + "ssl": { + "repo_id": "smp-hub/resnext101_32x16d.ssl", + "revision": "1283fe03fbb6aa2599b2df24095255acb93c3d5c", + }, + "swsl": { + "repo_id": "smp-hub/resnext101_32x16d.swsl", + "revision": "30ba61bbd4d6af0d955c513dbb4f557b84eb094f", + }, + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": Bottleneck, @@ -398,7 +277,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "resnext101_32x32d": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnext101_32x32d"], + "pretrained_settings": { + "instagram": { + "repo_id": "smp-hub/resnext101_32x32d.instagram", + "revision": "c9405de121fdaa275a89de470fb19409e3eeaa86", + }, + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": Bottleneck, @@ -409,7 +293,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "resnext101_32x48d": { "encoder": ResNetEncoder, - "pretrained_settings": pretrained_settings["resnext101_32x48d"], + "pretrained_settings": { + "instagram": { + "repo_id": "smp-hub/resnext101_32x48d.instagram", + "revision": "53e61a962b824ad7027409821f9ac3e3336dd024", + }, + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": Bottleneck, diff --git a/segmentation_models_pytorch/encoders/senet.py b/segmentation_models_pytorch/encoders/senet.py index ff900742..18dbfd91 100644 --- a/segmentation_models_pytorch/encoders/senet.py +++ b/segmentation_models_pytorch/encoders/senet.py @@ -172,7 +172,12 @@ def load_state_dict(self, state_dict, **kwargs): senet_encoders = { "senet154": { "encoder": SENetEncoder, - "pretrained_settings": pretrained_settings["senet154"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/senet154.imagenet", + "revision": "249f45efc9881ba560a0c480128edbc34ab87e40", + } + }, "params": { "out_channels": [3, 128, 256, 512, 1024, 2048], "block": SEBottleneck, @@ -185,7 +190,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "se_resnet50": { "encoder": SENetEncoder, - "pretrained_settings": pretrained_settings["se_resnet50"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/se_resnet50.imagenet", + "revision": "e6b4bc2dc85226c3d3474544410724a485455459", + } + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": SEResNetBottleneck, @@ -202,7 +212,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "se_resnet101": { "encoder": SENetEncoder, - "pretrained_settings": pretrained_settings["se_resnet101"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/se_resnet101.imagenet", + "revision": "71fe95cc0a27f444cf83671f354de02dc741b18b", + } + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": SEResNetBottleneck, @@ -219,7 +234,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "se_resnet152": { "encoder": SENetEncoder, - "pretrained_settings": pretrained_settings["se_resnet152"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/se_resnet152.imagenet", + "revision": "e79fc3d9d76f197bd76a2593c2054edf1083fe32", + } + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": SEResNetBottleneck, @@ -236,7 +256,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "se_resnext50_32x4d": { "encoder": SENetEncoder, - "pretrained_settings": pretrained_settings["se_resnext50_32x4d"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/se_resnext50_32x4d.imagenet", + "revision": "73246406d879a2b0e3fdfe6fddd56347d38f38ae", + } + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": SEResNeXtBottleneck, @@ -253,7 +278,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "se_resnext101_32x4d": { "encoder": SENetEncoder, - "pretrained_settings": pretrained_settings["se_resnext101_32x4d"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/se_resnext101_32x4d.imagenet", + "revision": "18808a4276f46421d358a9de554e0b93c2795df4", + } + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": SEResNeXtBottleneck, diff --git a/segmentation_models_pytorch/encoders/timm_efficientnet.py b/segmentation_models_pytorch/encoders/timm_efficientnet.py index 0dbb90b0..a1c36491 100644 --- a/segmentation_models_pytorch/encoders/timm_efficientnet.py +++ b/segmentation_models_pytorch/encoders/timm_efficientnet.py @@ -5,7 +5,7 @@ from functools import partial from timm.models.efficientnet import EfficientNet -from timm.models.efficientnet import decode_arch_def, round_channels, default_cfgs +from timm.models.efficientnet import decode_arch_def, round_channels from timm.layers.activations import Swish from ._base import EncoderMixin @@ -220,15 +220,18 @@ def prepare_settings(settings): "timm-efficientnet-b0": { "encoder": EfficientNetEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_b0"].cfgs["in1k"] - ), - "advprop": prepare_settings( - default_cfgs["tf_efficientnet_b0"].cfgs["ap_in1k"] - ), - "noisy-student": prepare_settings( - default_cfgs["tf_efficientnet_b0"].cfgs["ns_jft_in1k"] - ), + "imagenet": { + "repo_id": "smp-hub/timm-efficientnet-b0.imagenet", + "revision": "8419e9cc19da0b68dcd7bb12f19b7c92407ad7c4", + }, + "advprop": { + "repo_id": "smp-hub/timm-efficientnet-b0.advprop", + "revision": "a5870af2d24ce79e0cc7fae2bbd8e0a21fcfa6d8", + }, + "noisy-student": { + "repo_id": "smp-hub/timm-efficientnet-b0.noisy-student", + "revision": "bea8b0ff726a50e48774d2d360c5fb1ac4815836", + }, }, "params": { "out_channels": [3, 32, 24, 40, 112, 320], @@ -241,15 +244,18 @@ def prepare_settings(settings): "timm-efficientnet-b1": { "encoder": EfficientNetEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_b1"].cfgs["in1k"] - ), - "advprop": prepare_settings( - default_cfgs["tf_efficientnet_b1"].cfgs["ap_in1k"] - ), - "noisy-student": prepare_settings( - default_cfgs["tf_efficientnet_b1"].cfgs["ns_jft_in1k"] - ), + "imagenet": { + "repo_id": "smp-hub/timm-efficientnet-b1.imagenet", + "revision": "63bdd65ef6596ef24f1cadc7dd4f46b624442349", + }, + "advprop": { + "repo_id": "smp-hub/timm-efficientnet-b1.advprop", + "revision": "79b3d102080ef679b16c2748e608a871112233d0", + }, + "noisy-student": { + "repo_id": "smp-hub/timm-efficientnet-b1.noisy-student", + "revision": "36856124a699f6032574ceeefab02040daa90a9a", + }, }, "params": { "out_channels": [3, 32, 24, 40, 112, 320], @@ -262,15 +268,18 @@ def prepare_settings(settings): "timm-efficientnet-b2": { "encoder": EfficientNetEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_b2"].cfgs["in1k"] - ), - "advprop": prepare_settings( - default_cfgs["tf_efficientnet_b2"].cfgs["ap_in1k"] - ), - "noisy-student": prepare_settings( - default_cfgs["tf_efficientnet_b2"].cfgs["ns_jft_in1k"] - ), + "imagenet": { + "repo_id": "smp-hub/timm-efficientnet-b2.imagenet", + "revision": "e693adb39d3cb3847e71e3700a0c2aa58072cff1", + }, + "advprop": { + "repo_id": "smp-hub/timm-efficientnet-b2.advprop", + "revision": "b58479bf78007cfbb365091d64eeee369bddfa21", + }, + "noisy-student": { + "repo_id": "smp-hub/timm-efficientnet-b2.noisy-student", + "revision": "67c558827c6d3e0975ff9b4bce8557bc2ca80931", + }, }, "params": { "out_channels": [3, 32, 24, 48, 120, 352], @@ -283,15 +292,18 @@ def prepare_settings(settings): "timm-efficientnet-b3": { "encoder": EfficientNetEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_b3"].cfgs["in1k"] - ), - "advprop": prepare_settings( - default_cfgs["tf_efficientnet_b3"].cfgs["ap_in1k"] - ), - "noisy-student": prepare_settings( - default_cfgs["tf_efficientnet_b3"].cfgs["ns_jft_in1k"] - ), + "imagenet": { + "repo_id": "smp-hub/timm-efficientnet-b3.imagenet", + "revision": "1666b835b5151d6bb2067c7cd67e67ada6c39edf", + }, + "advprop": { + "repo_id": "smp-hub/timm-efficientnet-b3.advprop", + "revision": "70474cdb9f1ff4fcbd7434e66560ead1ab8e506b", + }, + "noisy-student": { + "repo_id": "smp-hub/timm-efficientnet-b3.noisy-student", + "revision": "2367bc9f61e79ee97684169a71a87db280bcf4db", + }, }, "params": { "out_channels": [3, 40, 32, 48, 136, 384], @@ -304,15 +316,18 @@ def prepare_settings(settings): "timm-efficientnet-b4": { "encoder": EfficientNetEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_b4"].cfgs["in1k"] - ), - "advprop": prepare_settings( - default_cfgs["tf_efficientnet_b4"].cfgs["ap_in1k"] - ), - "noisy-student": prepare_settings( - default_cfgs["tf_efficientnet_b4"].cfgs["ns_jft_in1k"] - ), + "imagenet": { + "repo_id": "smp-hub/timm-efficientnet-b4.imagenet", + "revision": "07868c28ab308f4de4cf1e7ec54b33b8b002ccdb", + }, + "advprop": { + "repo_id": "smp-hub/timm-efficientnet-b4.advprop", + "revision": "8ea1772ee9a2a0d18c1b56dce0dfac8dd33d537d", + }, + "noisy-student": { + "repo_id": "smp-hub/timm-efficientnet-b4.noisy-student", + "revision": "faeb77b6e8292a700380c840d39442d7ce4d6443", + }, }, "params": { "out_channels": [3, 48, 32, 56, 160, 448], @@ -325,15 +340,18 @@ def prepare_settings(settings): "timm-efficientnet-b5": { "encoder": EfficientNetEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_b5"].cfgs["in1k"] - ), - "advprop": prepare_settings( - default_cfgs["tf_efficientnet_b5"].cfgs["ap_in1k"] - ), - "noisy-student": prepare_settings( - default_cfgs["tf_efficientnet_b5"].cfgs["ns_jft_in1k"] - ), + "imagenet": { + "repo_id": "smp-hub/timm-efficientnet-b5.imagenet", + "revision": "004153b4ddd93d30afd9bbf34329d7f57396d413", + }, + "advprop": { + "repo_id": "smp-hub/timm-efficientnet-b5.advprop", + "revision": "1d1c5f05aab5ed9a1d5052847ddd4024c06a464d", + }, + "noisy-student": { + "repo_id": "smp-hub/timm-efficientnet-b5.noisy-student", + "revision": "9bc3a1e5490de92b1af061d5c2c474ab3129e38c", + }, }, "params": { "out_channels": [3, 48, 40, 64, 176, 512], @@ -346,15 +364,18 @@ def prepare_settings(settings): "timm-efficientnet-b6": { "encoder": EfficientNetEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_b6"].cfgs["aa_in1k"] - ), - "advprop": prepare_settings( - default_cfgs["tf_efficientnet_b6"].cfgs["ap_in1k"] - ), - "noisy-student": prepare_settings( - default_cfgs["tf_efficientnet_b6"].cfgs["ns_jft_in1k"] - ), + "imagenet": { + "repo_id": "smp-hub/timm-efficientnet-b6.imagenet", + "revision": "dbbf28a5c33f021486db4070de693caad6b56c3d", + }, + "advprop": { + "repo_id": "smp-hub/timm-efficientnet-b6.advprop", + "revision": "3b5d3412047f7711c56ffde997911cfefe79f835", + }, + "noisy-student": { + "repo_id": "smp-hub/timm-efficientnet-b6.noisy-student", + "revision": "9b899ea9e8e0ce2ccada0f34a8cb8b5028e9bb36", + }, }, "params": { "out_channels": [3, 56, 40, 72, 200, 576], @@ -367,15 +388,18 @@ def prepare_settings(settings): "timm-efficientnet-b7": { "encoder": EfficientNetEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_b7"].cfgs["aa_in1k"] - ), - "advprop": prepare_settings( - default_cfgs["tf_efficientnet_b7"].cfgs["ap_in1k"] - ), - "noisy-student": prepare_settings( - default_cfgs["tf_efficientnet_b7"].cfgs["ns_jft_in1k"] - ), + "imagenet": { + "repo_id": "smp-hub/timm-efficientnet-b7.imagenet", + "revision": "8ef7ffccf54dad9baceb21d05b7ef86b6b70f4cc", + }, + "advprop": { + "repo_id": "smp-hub/timm-efficientnet-b7.advprop", + "revision": "fcbc576ffb939c12d5cd8dad523fdae6eb0177ca", + }, + "noisy-student": { + "repo_id": "smp-hub/timm-efficientnet-b7.noisy-student", + "revision": "6b1dd73e61bf934d485d7bd4381dc3e2ab374664", + }, }, "params": { "out_channels": [3, 64, 48, 80, 224, 640], @@ -388,12 +412,14 @@ def prepare_settings(settings): "timm-efficientnet-b8": { "encoder": EfficientNetEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_b8"].cfgs["ra_in1k"] - ), - "advprop": prepare_settings( - default_cfgs["tf_efficientnet_b8"].cfgs["ap_in1k"] - ), + "imagenet": { + "repo_id": "smp-hub/timm-efficientnet-b8.imagenet", + "revision": "b5e9dde35605a3a6d17ea2a727382625f9066a37", + }, + "advprop": { + "repo_id": "smp-hub/timm-efficientnet-b8.advprop", + "revision": "e43f381de72e7467383c2c80bacbb7fcb9572866", + }, }, "params": { "out_channels": [3, 72, 56, 88, 248, 704], @@ -406,12 +432,14 @@ def prepare_settings(settings): "timm-efficientnet-l2": { "encoder": EfficientNetEncoder, "pretrained_settings": { - "noisy-student": prepare_settings( - default_cfgs["tf_efficientnet_l2"].cfgs["ns_jft_in1k"] - ), - "noisy-student-475": prepare_settings( - default_cfgs["tf_efficientnet_l2"].cfgs["ns_jft_in1k_475"] - ), + "noisy-student": { + "repo_id": "smp-hub/timm-efficientnet-l2.noisy-student", + "revision": "cdc711e76d1becdd9197169f1a8bb1b2094e980c", + }, + "noisy-student-475": { + "repo_id": "smp-hub/timm-efficientnet-l2.noisy-student-475", + "revision": "35f5ba667a64bf4f3f0689daf84fc6d0f8e1311b", + }, }, "params": { "out_channels": [3, 136, 104, 176, 480, 1376], @@ -424,9 +452,10 @@ def prepare_settings(settings): "timm-tf_efficientnet_lite0": { "encoder": EfficientNetLiteEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_lite0"].cfgs["in1k"] - ) + "imagenet": { + "repo_id": "smp-hub/timm-tf_efficientnet_lite0.imagenet", + "revision": "f5729249af07e5d923fb8b16922256ce2865d108", + }, }, "params": { "out_channels": [3, 32, 24, 40, 112, 320], @@ -439,9 +468,10 @@ def prepare_settings(settings): "timm-tf_efficientnet_lite1": { "encoder": EfficientNetLiteEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_lite1"].cfgs["in1k"] - ) + "imagenet": { + "repo_id": "smp-hub/timm-tf_efficientnet_lite1.imagenet", + "revision": "7b5e3f8dbb0c13b74101773584bba7523721be72", + }, }, "params": { "out_channels": [3, 32, 24, 40, 112, 320], @@ -454,9 +484,10 @@ def prepare_settings(settings): "timm-tf_efficientnet_lite2": { "encoder": EfficientNetLiteEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_lite2"].cfgs["in1k"] - ) + "imagenet": { + "repo_id": "smp-hub/timm-tf_efficientnet_lite2.imagenet", + "revision": "cc5f6cd4c7409ebacc13292f09d369ae88547f6a", + }, }, "params": { "out_channels": [3, 32, 24, 48, 120, 352], @@ -469,9 +500,10 @@ def prepare_settings(settings): "timm-tf_efficientnet_lite3": { "encoder": EfficientNetLiteEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_lite3"].cfgs["in1k"] - ) + "imagenet": { + "repo_id": "smp-hub/timm-tf_efficientnet_lite3.imagenet", + "revision": "ab29c8402991591d66f813bbb1f061565d9b0cd0", + }, }, "params": { "out_channels": [3, 32, 32, 48, 136, 384], @@ -484,9 +516,10 @@ def prepare_settings(settings): "timm-tf_efficientnet_lite4": { "encoder": EfficientNetLiteEncoder, "pretrained_settings": { - "imagenet": prepare_settings( - default_cfgs["tf_efficientnet_lite4"].cfgs["in1k"] - ) + "imagenet": { + "repo_id": "smp-hub/timm-tf_efficientnet_lite4.imagenet", + "revision": "91a822e0f03c255b34dfb7846d3858397e50ba39", + }, }, "params": { "out_channels": [3, 32, 32, 56, 160, 448], diff --git a/segmentation_models_pytorch/encoders/timm_sknet.py b/segmentation_models_pytorch/encoders/timm_sknet.py index 12fdd822..49fda0e8 100644 --- a/segmentation_models_pytorch/encoders/timm_sknet.py +++ b/segmentation_models_pytorch/encoders/timm_sknet.py @@ -68,35 +68,15 @@ def load_state_dict(self, state_dict, **kwargs): super().load_state_dict(state_dict, **kwargs) -sknet_weights = { - "timm-skresnet18": { - "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet18_ra-4eec2804.pth" # noqa - }, - "timm-skresnet34": { - "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet34_ra-bdc0ccde.pth" # noqa - }, - "timm-skresnext50_32x4d": { - "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnext50_ra-f40e40bf.pth" # noqa - }, -} - -pretrained_settings = {} -for model_name, sources in sknet_weights.items(): - pretrained_settings[model_name] = {} - for source_name, source_url in sources.items(): - pretrained_settings[model_name][source_name] = { - "url": source_url, - "input_size": [3, 224, 224], - "input_range": [0, 1], - "mean": [0.485, 0.456, 0.406], - "std": [0.229, 0.224, 0.225], - "num_classes": 1000, - } - timm_sknet_encoders = { "timm-skresnet18": { "encoder": SkNetEncoder, - "pretrained_settings": pretrained_settings["timm-skresnet18"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/timm-skresnet18.imagenet", + "revision": "6c97652bb744d89177b68274d2fda3923a7d1f95", + }, + }, "params": { "out_channels": [3, 64, 64, 128, 256, 512], "block": SelectiveKernelBasic, @@ -107,7 +87,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "timm-skresnet34": { "encoder": SkNetEncoder, - "pretrained_settings": pretrained_settings["timm-skresnet34"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/timm-skresnet34.imagenet", + "revision": "2367796924a8182cc835ef6b5dc303917f923f99", + }, + }, "params": { "out_channels": [3, 64, 64, 128, 256, 512], "block": SelectiveKernelBasic, @@ -118,7 +103,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "timm-skresnext50_32x4d": { "encoder": SkNetEncoder, - "pretrained_settings": pretrained_settings["timm-skresnext50_32x4d"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/timm-skresnext50_32x4d.imagenet", + "revision": "50207e407cc4c6ea9e6872963db6844ca7b7b9de", + }, + }, "params": { "out_channels": [3, 64, 256, 512, 1024, 2048], "block": SelectiveKernelBottleneck, diff --git a/segmentation_models_pytorch/encoders/vgg.py b/segmentation_models_pytorch/encoders/vgg.py index 5b89a50a..1bb577fe 100644 --- a/segmentation_models_pytorch/encoders/vgg.py +++ b/segmentation_models_pytorch/encoders/vgg.py @@ -200,7 +200,12 @@ def load_state_dict(self, state_dict, **kwargs): vgg_encoders = { "vgg11": { "encoder": VGGEncoder, - "pretrained_settings": pretrained_settings["vgg11"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/vgg11.imagenet", + "revision": "ad8b90e1051c38fdbf399cf5016886a1be357390", + }, + }, "params": { "out_channels": [64, 128, 256, 512, 512, 512], "config": cfg["A"], @@ -209,7 +214,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "vgg11_bn": { "encoder": VGGEncoder, - "pretrained_settings": pretrained_settings["vgg11_bn"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/vgg11_bn.imagenet", + "revision": "59757f9215032c9f092977092d57d26a9df7fd9c", + }, + }, "params": { "out_channels": [64, 128, 256, 512, 512, 512], "config": cfg["A"], @@ -218,7 +228,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "vgg13": { "encoder": VGGEncoder, - "pretrained_settings": pretrained_settings["vgg13"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/vgg13.imagenet", + "revision": "1b70ff2580f101a8007a48b51e2b5d1e5925dc42", + }, + }, "params": { "out_channels": [64, 128, 256, 512, 512, 512], "config": cfg["B"], @@ -227,7 +242,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "vgg13_bn": { "encoder": VGGEncoder, - "pretrained_settings": pretrained_settings["vgg13_bn"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/vgg13_bn.imagenet", + "revision": "9be454515193af6612261b7614fe90607e27b143", + }, + }, "params": { "out_channels": [64, 128, 256, 512, 512, 512], "config": cfg["B"], @@ -236,7 +256,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "vgg16": { "encoder": VGGEncoder, - "pretrained_settings": pretrained_settings["vgg16"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/vgg16.imagenet", + "revision": "49d74b799006ee252b86e25acd6f1fd8ac9a99c1", + }, + }, "params": { "out_channels": [64, 128, 256, 512, 512, 512], "config": cfg["D"], @@ -245,7 +270,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "vgg16_bn": { "encoder": VGGEncoder, - "pretrained_settings": pretrained_settings["vgg16_bn"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/vgg16_bn.imagenet", + "revision": "2c186d02fb519e93219a99a1c2af6295aef0bf0d", + }, + }, "params": { "out_channels": [64, 128, 256, 512, 512, 512], "config": cfg["D"], @@ -254,7 +284,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "vgg19": { "encoder": VGGEncoder, - "pretrained_settings": pretrained_settings["vgg19"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/vgg19.imagenet", + "revision": "2853d00d7bca364dbb98be4d6afa347e5aeec1f6", + }, + }, "params": { "out_channels": [64, 128, 256, 512, 512, 512], "config": cfg["E"], @@ -263,7 +298,12 @@ def load_state_dict(self, state_dict, **kwargs): }, "vgg19_bn": { "encoder": VGGEncoder, - "pretrained_settings": pretrained_settings["vgg19_bn"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/vgg19_bn.imagenet", + "revision": "f09a924cb0d201ea6f61601df9559141382271d7", + }, + }, "params": { "out_channels": [64, 128, 256, 512, 512, 512], "config": cfg["E"], diff --git a/segmentation_models_pytorch/encoders/xception.py b/segmentation_models_pytorch/encoders/xception.py index f81dc959..594636a4 100644 --- a/segmentation_models_pytorch/encoders/xception.py +++ b/segmentation_models_pytorch/encoders/xception.py @@ -87,25 +87,15 @@ def load_state_dict(self, state_dict): super().load_state_dict(state_dict) -pretrained_settings = { - "xception": { - "imagenet": { - "url": "http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth", - "input_space": "RGB", - "input_size": [3, 299, 299], - "input_range": [0, 1], - "mean": [0.5, 0.5, 0.5], - "std": [0.5, 0.5, 0.5], - "num_classes": 1000, - "scale": 0.8975, # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299 - } - } -} - xception_encoders = { "xception": { "encoder": XceptionEncoder, - "pretrained_settings": pretrained_settings["xception"], + "pretrained_settings": { + "imagenet": { + "repo_id": "smp-hub/xception.imagenet", + "revision": "01cfaf27c11353b1f0c578e7e26d2c000ea91049", + }, + }, "params": {"out_channels": [3, 64, 128, 256, 728, 2048]}, } } diff --git a/tests/encoders/test_common.py b/tests/encoders/test_common.py new file mode 100644 index 00000000..f94fd303 --- /dev/null +++ b/tests/encoders/test_common.py @@ -0,0 +1,15 @@ +import pytest +import segmentation_models_pytorch as smp +from tests.utils import slow_test + + +@pytest.mark.parametrize( + "encoder_name_and_weights", + [ + ("resnet18", "imagenet"), + ], +) +@slow_test +def test_load_encoder_from_hub(encoder_name_and_weights): + encoder_name, weights = encoder_name_and_weights + smp.encoders.get_encoder(encoder_name, weights=weights)