deep-person-reid/torchreid/models/senet.py

from __future__ import absolute_import
from __future__ import division

from collections import OrderedDict
import math

import torch
import torch.nn as nn
from torch.utils import model_zoo
from torch.nn import functional as F
import torchvision


"""
Code imported from https://github.com/Cadene/pretrained-models.pytorch
"""


__all__ = ['senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d',
           'se_resnet50_fc512']


pretrained_settings = {
    'senet154': {
        'imagenet': {
            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth',
            'input_space': 'RGB',
            'input_size': [3, 224, 224],
            'input_range': [0, 1],
            'mean': [0.485, 0.456, 0.406],
            'std': [0.229, 0.224, 0.225],
            'num_classes': 1000
        }
    },
    'se_resnet50': {
        'imagenet': {
            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth',
            'input_space': 'RGB',
            'input_size': [3, 224, 224],
            'input_range': [0, 1],
            'mean': [0.485, 0.456, 0.406],
            'std': [0.229, 0.224, 0.225],
            'num_classes': 1000
        }
    },
    'se_resnet101': {
        'imagenet': {
            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth',
            'input_space': 'RGB',
            'input_size': [3, 224, 224],
            'input_range': [0, 1],
            'mean': [0.485, 0.456, 0.406],
            'std': [0.229, 0.224, 0.225],
            'num_classes': 1000
        }
    },
    'se_resnet152': {
        'imagenet': {
            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth',
            'input_space': 'RGB',
            'input_size': [3, 224, 224],
            'input_range': [0, 1],
            'mean': [0.485, 0.456, 0.406],
            'std': [0.229, 0.224, 0.225],
            'num_classes': 1000
        }
    },
    'se_resnext50_32x4d': {
        'imagenet': {
            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth',
            'input_space': 'RGB',
            'input_size': [3, 224, 224],
            'input_range': [0, 1],
            'mean': [0.485, 0.456, 0.406],
            'std': [0.229, 0.224, 0.225],
            'num_classes': 1000
        }
    },
    'se_resnext101_32x4d': {
        'imagenet': {
            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth',
            'input_space': 'RGB',
            'input_size': [3, 224, 224],
            'input_range': [0, 1],
            'mean': [0.485, 0.456, 0.406],
            'std': [0.229, 0.224, 0.225],
            'num_classes': 1000
        }
    },
}


class SEModule(nn.Module):

    def __init__(self, channels, reduction):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return module_input * x


class Bottleneck(nn.Module):
    """
    Base class for bottlenecks that implements `forward()` method.
    """
    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out = self.se_module(out) + residual
        out = self.relu(out)

        return out


class SEBottleneck(Bottleneck):
    """
    Bottleneck for SENet154.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None):
        super(SEBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes * 2)
        self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
                               stride=stride, padding=1, groups=groups,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(planes * 4)
        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
                               bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNetBottleneck(Bottleneck):
    """
    ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
    implementation and uses `stride=stride` in `conv1` and not in `conv2`
    (the latter is used in the torchvision implementation of ResNet).
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None):
        super(SEResNetBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
                               stride=stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
                               groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNeXtBottleneck(Bottleneck):
    """
    ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None, base_width=4):
        super(SEResNeXtBottleneck, self).__init__()
        width = int(math.floor(planes * (base_width / 64.)) * groups)
        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
                               stride=1)
        self.bn1 = nn.BatchNorm2d(width)
        self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
                               padding=1, groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(width)
        self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SENet(nn.Module):

    def __init__(self, num_classes, loss, block, layers, groups, reduction, dropout_p=0.2,
                 inplanes=128, input_3x3=True, downsample_kernel_size=3, downsample_padding=1,
                 last_stride=2, fc_dims=None, **kwargs):
        """
        Parameters
        ----------
        block (nn.Module): Bottleneck class.
            - For SENet154: SEBottleneck
            - For SE-ResNet models: SEResNetBottleneck
            - For SE-ResNeXt models:  SEResNeXtBottleneck
        layers (list of ints): Number of residual blocks for 4 layers of the
            network (layer1...layer4).
        groups (int): Number of groups for the 3x3 convolution in each
            bottleneck block.
            - For SENet154: 64
            - For SE-ResNet models: 1
            - For SE-ResNeXt models:  32
        reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
            - For all models: 16
        dropout_p (float or None): Drop probability for the Dropout layer.
            If `None` the Dropout layer is not used.
            - For SENet154: 0.2
            - For SE-ResNet models: None
            - For SE-ResNeXt models: None
        inplanes (int):  Number of input channels for layer1.
            - For SENet154: 128
            - For SE-ResNet models: 64
            - For SE-ResNeXt models: 64
        input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
            a single 7x7 convolution in layer0.
            - For SENet154: True
            - For SE-ResNet models: False
            - For SE-ResNeXt models: False
        downsample_kernel_size (int): Kernel size for downsampling convolutions
            in layer2, layer3 and layer4.
            - For SENet154: 3
            - For SE-ResNet models: 1
            - For SE-ResNeXt models: 1
        downsample_padding (int): Padding for downsampling convolutions in
            layer2, layer3 and layer4.
            - For SENet154: 1
            - For SE-ResNet models: 0
            - For SE-ResNeXt models: 0
        num_classes (int): Number of outputs in `classifier` layer.
        """
        super(SENet, self).__init__()
        self.inplanes = inplanes
        self.loss = loss

        if input_3x3:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
                                    bias=False)),
                ('bn1', nn.BatchNorm2d(64)),
                ('relu1', nn.ReLU(inplace=True)),
                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn2', nn.BatchNorm2d(64)),
                ('relu2', nn.ReLU(inplace=True)),
                ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn3', nn.BatchNorm2d(inplanes)),
                ('relu3', nn.ReLU(inplace=True)),
            ]
        else:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
                                    padding=3, bias=False)),
                ('bn1', nn.BatchNorm2d(inplanes)),
                ('relu1', nn.ReLU(inplace=True)),
            ]
        # To preserve compatibility with Caffe weights `ceil_mode=True`
        # is used instead of `padding=1`.
        layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
                                                    ceil_mode=True)))
        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
        self.layer1 = self._make_layer(
            block,
            planes=64,
            blocks=layers[0],
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=1,
            downsample_padding=0
        )
        self.layer2 = self._make_layer(
            block,
            planes=128,
            blocks=layers[1],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer3 = self._make_layer(
            block,
            planes=256,
            blocks=layers[2],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer4 = self._make_layer(
            block,
            planes=512,
            blocks=layers[3],
            stride=last_stride,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        
        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = self._construct_fc_layer(fc_dims, 512 * block.expansion, dropout_p)
        self.classifier = nn.Linear(self.feature_dim, num_classes)

    def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
                    downsample_kernel_size=1, downsample_padding=0):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=downsample_kernel_size, stride=stride,
                          padding=downsample_padding, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, groups, reduction, stride,
                            downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups, reduction))

        return nn.Sequential(*layers)

    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
        """
        Construct fully connected layer

        - fc_dims (list or tuple): dimensions of fc layers, if None,
                                   no fc layers are constructed
        - input_dim (int): input dimension
        - dropout_p (float): dropout probability, if None, dropout is unused
        """
        if fc_dims is None:
            self.feature_dim = input_dim
            return None
        
        assert isinstance(fc_dims, (list, tuple)), "fc_dims must be either list or tuple, but got {}".format(type(fc_dims))
        
        layers = []
        for dim in fc_dims:
            layers.append(nn.Linear(input_dim, dim))
            layers.append(nn.BatchNorm1d(dim))
            layers.append(nn.ReLU(inplace=True))
            if dropout_p is not None:
                layers.append(nn.Dropout(p=dropout_p))
            input_dim = dim
        
        self.feature_dim = fc_dims[-1]
        
        return nn.Sequential(*layers)

    def featuremaps(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        return x

    def forward(self, x):
        f = self.featuremaps(x)
        v = self.global_avgpool(f)
        v = v.view(v.size(0), -1)
        
        if self.fc is not None:
            v = self.fc(v)
        
        if not self.training:
            return v
        
        y = self.classifier(v)
        
        if self.loss == {'xent'}:
            return y
        elif self.loss == {'xent', 'htri'}:
            return y, v
        else:
            raise KeyError("Unsupported loss: {}".format(self.loss))


def init_pretrained_weights(model, model_url):
    """
    Initialize model with pretrained weights.
    Layers that don't match with pretrained layers in name or size are kept unchanged.
    """
    pretrain_dict = model_zoo.load_url(model_url, map_location=None)
    model_dict = model.state_dict()
    pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()}
    model_dict.update(pretrain_dict)
    model.load_state_dict(model_dict)
    print("Initialized model with pretrained weights from {}".format(model_url))


def senet154(num_classes, loss, pretrained='imagenet', **kwargs):
    model = SENet(
        num_classes=num_classes,
        loss=loss,
        block=SEBottleneck,
        layers=[3, 8, 36, 3],
        groups=64,
        reduction=16,
        dropout_p=0.2,
        last_stride=2,
        fc_dims=None,
        **kwargs
    )
    if pretrained == 'imagenet':
        init_pretrained_weights(model, pretrained_settings['senet154']['imagenet']['url'])
    return model


def se_resnet50(num_classes, loss, pretrained='imagenet', **kwargs):
    model = SENet(
        num_classes=num_classes,
        loss=loss,
        block=SEResNetBottleneck,
        layers=[3, 4, 6, 3],
        groups=1,
        reduction=16,
        dropout_p=None,
        inplanes=64,
        input_3x3=False,
        downsample_kernel_size=1,
        downsample_padding=0,
        last_stride=2,
        fc_dims=None,
        **kwargs
    )
    if pretrained == 'imagenet':
        init_pretrained_weights(model, pretrained_settings['se_resnet50']['imagenet']['url'])
    return model


def se_resnet50_fc512(num_classes, loss, pretrained='imagenet', **kwargs):
    model = SENet(
        num_classes=num_classes,
        loss=loss,
        block=SEResNetBottleneck,
        layers=[3, 4, 6, 3],
        groups=1,
        reduction=16,
        dropout_p=None,
        inplanes=64,
        input_3x3=False,
        downsample_kernel_size=1,
        downsample_padding=0,
        last_stride=1,
        fc_dims=[512],
        **kwargs
    )
    if pretrained == 'imagenet':
        init_pretrained_weights(model, pretrained_settings['se_resnet50']['imagenet']['url'])
    return model


def se_resnet101(num_classes, loss, pretrained='imagenet', **kwargs):
    model = SENet(
        num_classes=num_classes,
        loss=loss,
        block=SEResNetBottleneck,
        layers=[3, 4, 23, 3],
        groups=1,
        reduction=16,
        dropout_p=None,
        inplanes=64,
        input_3x3=False,
        downsample_kernel_size=1,
        downsample_padding=0,
        last_stride=2,
        fc_dims=None,
        **kwargs
    )
    if pretrained == 'imagenet':
        init_pretrained_weights(model, pretrained_settings['se_resnet101']['imagenet']['url'])
    return model


def se_resnet152(num_classes, loss, pretrained='imagenet', **kwargs):
    model = SENet(
        num_classes=num_classes,
        loss=loss,
        block=SEResNetBottleneck,
        layers=[3, 8, 36, 3],
        groups=1,
        reduction=16,
        dropout_p=None,
        inplanes=64,
        input_3x3=False,
        downsample_kernel_size=1,
        downsample_padding=0,
        last_stride=2,
        fc_dims=None,
        **kwargs
    )
    if pretrained == 'imagenet':
        init_pretrained_weights(model, pretrained_settings['se_resnet152']['imagenet']['url'])
    return model


def se_resnext50_32x4d(num_classes, loss, pretrained='imagenet', **kwargs):
    model = SENet(
        num_classes=num_classes,
        loss=loss,
        block=SEResNeXtBottleneck,
        layers=[3, 4, 6, 3],
        groups=32,
        reduction=16,
        dropout_p=None,
        inplanes=64,
        input_3x3=False,
        downsample_kernel_size=1,
        downsample_padding=0,
        last_stride=2,
        fc_dims=None,
        **kwargs
    )
    if pretrained == 'imagenet':
        init_pretrained_weights(model, pretrained_settings['se_resnext50_32x4d']['imagenet']['url'])
    return model


def se_resnext101_32x4d(num_classes, loss, pretrained='imagenet', **kwargs):
    model = SENet(
        num_classes=num_classes,
        loss=loss,
        block=SEResNeXtBottleneck,
        layers=[3, 4, 23, 3],
        groups=32,
        reduction=16,
        dropout_p=None,
        inplanes=64,
        input_3x3=False,
        downsample_kernel_size=1,
        downsample_padding=0,
        last_stride=2,
        fc_dims=None,
        **kwargs
    )
    if pretrained == 'imagenet':
        init_pretrained_weights(model, pretrained_settings['se_resnext101_32x4d']['imagenet']['url'])
    return model
standardize code 2018-07-04 10:32:43 +01:00			`from __future__ import absolute_import`
			`from __future__ import division`
restore models 2018-07-02 10:33:10 +01:00
			`from collections import OrderedDict`
			`import math`

			`import torch`
			`import torch.nn as nn`
			`from torch.utils import model_zoo`
			`from torch.nn import functional as F`
			`import torchvision`


			`"""`
			`Code imported from https://github.com/Cadene/pretrained-models.pytorch`
			`"""`


add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`__all__ = ['senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d',`
			`'se_resnet50_fc512']`
restore models 2018-07-02 10:33:10 +01:00

			`pretrained_settings = {`
			`'senet154': {`
			`'imagenet': {`
			`'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth',`
			`'input_space': 'RGB',`
			`'input_size': [3, 224, 224],`
			`'input_range': [0, 1],`
			`'mean': [0.485, 0.456, 0.406],`
			`'std': [0.229, 0.224, 0.225],`
			`'num_classes': 1000`
			`}`
			`},`
			`'se_resnet50': {`
			`'imagenet': {`
			`'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth',`
			`'input_space': 'RGB',`
			`'input_size': [3, 224, 224],`
			`'input_range': [0, 1],`
			`'mean': [0.485, 0.456, 0.406],`
			`'std': [0.229, 0.224, 0.225],`
			`'num_classes': 1000`
			`}`
			`},`
			`'se_resnet101': {`
			`'imagenet': {`
			`'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth',`
			`'input_space': 'RGB',`
			`'input_size': [3, 224, 224],`
			`'input_range': [0, 1],`
			`'mean': [0.485, 0.456, 0.406],`
			`'std': [0.229, 0.224, 0.225],`
			`'num_classes': 1000`
			`}`
			`},`
			`'se_resnet152': {`
			`'imagenet': {`
			`'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth',`
			`'input_space': 'RGB',`
			`'input_size': [3, 224, 224],`
			`'input_range': [0, 1],`
			`'mean': [0.485, 0.456, 0.406],`
			`'std': [0.229, 0.224, 0.225],`
			`'num_classes': 1000`
			`}`
			`},`
			`'se_resnext50_32x4d': {`
			`'imagenet': {`
			`'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth',`
			`'input_space': 'RGB',`
			`'input_size': [3, 224, 224],`
			`'input_range': [0, 1],`
			`'mean': [0.485, 0.456, 0.406],`
			`'std': [0.229, 0.224, 0.225],`
			`'num_classes': 1000`
			`}`
			`},`
			`'se_resnext101_32x4d': {`
			`'imagenet': {`
			`'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth',`
			`'input_space': 'RGB',`
			`'input_size': [3, 224, 224],`
			`'input_range': [0, 1],`
			`'mean': [0.485, 0.456, 0.406],`
			`'std': [0.229, 0.224, 0.225],`
			`'num_classes': 1000`
			`}`
			`},`
			`}`


			`class SEModule(nn.Module):`

			`def __init__(self, channels, reduction):`
			`super(SEModule, self).__init__()`
			`self.avg_pool = nn.AdaptiveAvgPool2d(1)`
			`self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0)`
			`self.relu = nn.ReLU(inplace=True)`
			`self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0)`
			`self.sigmoid = nn.Sigmoid()`

			`def forward(self, x):`
			`module_input = x`
			`x = self.avg_pool(x)`
			`x = self.fc1(x)`
			`x = self.relu(x)`
			`x = self.fc2(x)`
			`x = self.sigmoid(x)`
			`return module_input * x`


			`class Bottleneck(nn.Module):`
			`"""`
			Base class for bottlenecks that implements `forward()` method.
			`"""`
			`def forward(self, x):`
			`residual = x`

			`out = self.conv1(x)`
			`out = self.bn1(out)`
			`out = self.relu(out)`

			`out = self.conv2(out)`
			`out = self.bn2(out)`
			`out = self.relu(out)`

			`out = self.conv3(out)`
			`out = self.bn3(out)`

			`if self.downsample is not None:`
			`residual = self.downsample(x)`

			`out = self.se_module(out) + residual`
			`out = self.relu(out)`

			`return out`


			`class SEBottleneck(Bottleneck):`
			`"""`
			`Bottleneck for SENet154.`
			`"""`
			`expansion = 4`

			`def __init__(self, inplanes, planes, groups, reduction, stride=1,`
			`downsample=None):`
			`super(SEBottleneck, self).__init__()`
			`self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)`
			`self.bn1 = nn.BatchNorm2d(planes * 2)`
			`self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,`
			`stride=stride, padding=1, groups=groups,`
			`bias=False)`
			`self.bn2 = nn.BatchNorm2d(planes * 4)`
			`self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,`
			`bias=False)`
			`self.bn3 = nn.BatchNorm2d(planes * 4)`
			`self.relu = nn.ReLU(inplace=True)`
			`self.se_module = SEModule(planes * 4, reduction=reduction)`
			`self.downsample = downsample`
			`self.stride = stride`


			`class SEResNetBottleneck(Bottleneck):`
			`"""`
			`ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe`
			implementation and uses `stride=stride` in `conv1` and not in `conv2`
			`(the latter is used in the torchvision implementation of ResNet).`
			`"""`
			`expansion = 4`

			`def __init__(self, inplanes, planes, groups, reduction, stride=1,`
			`downsample=None):`
			`super(SEResNetBottleneck, self).__init__()`
			`self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,`
			`stride=stride)`
			`self.bn1 = nn.BatchNorm2d(planes)`
			`self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,`
			`groups=groups, bias=False)`
			`self.bn2 = nn.BatchNorm2d(planes)`
			`self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)`
			`self.bn3 = nn.BatchNorm2d(planes * 4)`
			`self.relu = nn.ReLU(inplace=True)`
			`self.se_module = SEModule(planes * 4, reduction=reduction)`
			`self.downsample = downsample`
			`self.stride = stride`


			`class SEResNeXtBottleneck(Bottleneck):`
			`"""`
			`ResNeXt bottleneck type C with a Squeeze-and-Excitation module.`
			`"""`
			`expansion = 4`

			`def __init__(self, inplanes, planes, groups, reduction, stride=1,`
			`downsample=None, base_width=4):`
			`super(SEResNeXtBottleneck, self).__init__()`
			`width = int(math.floor(planes * (base_width / 64.)) * groups)`
			`self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,`
			`stride=1)`
			`self.bn1 = nn.BatchNorm2d(width)`
			`self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,`
			`padding=1, groups=groups, bias=False)`
			`self.bn2 = nn.BatchNorm2d(width)`
			`self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)`
			`self.bn3 = nn.BatchNorm2d(planes * 4)`
			`self.relu = nn.ReLU(inplace=True)`
			`self.se_module = SEModule(planes * 4, reduction=reduction)`
			`self.downsample = downsample`
			`self.stride = stride`


			`class SENet(nn.Module):`

add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`def __init__(self, num_classes, loss, block, layers, groups, reduction, dropout_p=0.2,`
			`inplanes=128, input_3x3=True, downsample_kernel_size=3, downsample_padding=1,`
add **kwargs to class __init__() 2018-10-27 10:35:47 +01:00			`last_stride=2, fc_dims=None, **kwargs):`
restore models 2018-07-02 10:33:10 +01:00			`"""`
			`Parameters`
			`----------`
			`block (nn.Module): Bottleneck class.`
			`- For SENet154: SEBottleneck`
			`- For SE-ResNet models: SEResNetBottleneck`
			`- For SE-ResNeXt models: SEResNeXtBottleneck`
			`layers (list of ints): Number of residual blocks for 4 layers of the`
			`network (layer1...layer4).`
			`groups (int): Number of groups for the 3x3 convolution in each`
			`bottleneck block.`
			`- For SENet154: 64`
			`- For SE-ResNet models: 1`
			`- For SE-ResNeXt models: 32`
			`reduction (int): Reduction ratio for Squeeze-and-Excitation modules.`
			`- For all models: 16`
			`dropout_p (float or None): Drop probability for the Dropout layer.`
			If `None` the Dropout layer is not used.
			`- For SENet154: 0.2`
			`- For SE-ResNet models: None`
			`- For SE-ResNeXt models: None`
			`inplanes (int): Number of input channels for layer1.`
			`- For SENet154: 128`
			`- For SE-ResNet models: 64`
			`- For SE-ResNeXt models: 64`
			input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
			`a single 7x7 convolution in layer0.`
			`- For SENet154: True`
			`- For SE-ResNet models: False`
			`- For SE-ResNeXt models: False`
			`downsample_kernel_size (int): Kernel size for downsampling convolutions`
			`in layer2, layer3 and layer4.`
			`- For SENet154: 3`
			`- For SE-ResNet models: 1`
			`- For SE-ResNeXt models: 1`
			`downsample_padding (int): Padding for downsampling convolutions in`
			`layer2, layer3 and layer4.`
			`- For SENet154: 1`
			`- For SE-ResNet models: 0`
			`- For SE-ResNeXt models: 0`
add dropout_p; update senet 2018-10-26 23:54:13 +01:00			num_classes (int): Number of outputs in `classifier` layer.
restore models 2018-07-02 10:33:10 +01:00			`"""`
			`super(SENet, self).__init__()`
			`self.inplanes = inplanes`
add self.loss 2018-10-27 10:40:20 +01:00			`self.loss = loss`

restore models 2018-07-02 10:33:10 +01:00			`if input_3x3:`
			`layer0_modules = [`
			`('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,`
			`bias=False)),`
			`('bn1', nn.BatchNorm2d(64)),`
			`('relu1', nn.ReLU(inplace=True)),`
			`('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,`
			`bias=False)),`
			`('bn2', nn.BatchNorm2d(64)),`
			`('relu2', nn.ReLU(inplace=True)),`
			`('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,`
			`bias=False)),`
			`('bn3', nn.BatchNorm2d(inplanes)),`
			`('relu3', nn.ReLU(inplace=True)),`
			`]`
			`else:`
			`layer0_modules = [`
			`('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,`
			`padding=3, bias=False)),`
			`('bn1', nn.BatchNorm2d(inplanes)),`
			`('relu1', nn.ReLU(inplace=True)),`
			`]`
			# To preserve compatibility with Caffe weights `ceil_mode=True`
			# is used instead of `padding=1`.
			`layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,`
			`ceil_mode=True)))`
			`self.layer0 = nn.Sequential(OrderedDict(layer0_modules))`
			`self.layer1 = self._make_layer(`
			`block,`
			`planes=64,`
			`blocks=layers[0],`
			`groups=groups,`
			`reduction=reduction,`
			`downsample_kernel_size=1,`
			`downsample_padding=0`
			`)`
			`self.layer2 = self._make_layer(`
			`block,`
			`planes=128,`
			`blocks=layers[1],`
			`stride=2,`
			`groups=groups,`
			`reduction=reduction,`
			`downsample_kernel_size=downsample_kernel_size,`
			`downsample_padding=downsample_padding`
			`)`
			`self.layer3 = self._make_layer(`
			`block,`
			`planes=256,`
			`blocks=layers[2],`
			`stride=2,`
			`groups=groups,`
			`reduction=reduction,`
			`downsample_kernel_size=downsample_kernel_size,`
			`downsample_padding=downsample_padding`
			`)`
			`self.layer4 = self._make_layer(`
			`block,`
			`planes=512,`
			`blocks=layers[3],`
add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`stride=last_stride,`
restore models 2018-07-02 10:33:10 +01:00			`groups=groups,`
			`reduction=reduction,`
			`downsample_kernel_size=downsample_kernel_size,`
			`downsample_padding=downsample_padding`
			`)`
add dropout_p; update senet 2018-10-26 23:54:13 +01:00
			`self.global_avgpool = nn.AdaptiveAvgPool2d(1)`
			`self.fc = self._construct_fc_layer(fc_dims, 512 * block.expansion, dropout_p)`
			`self.classifier = nn.Linear(self.feature_dim, num_classes)`
restore models 2018-07-02 10:33:10 +01:00
			`def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,`
			`downsample_kernel_size=1, downsample_padding=0):`
			`downsample = None`
			`if stride != 1 or self.inplanes != planes * block.expansion:`
			`downsample = nn.Sequential(`
			`nn.Conv2d(self.inplanes, planes * block.expansion,`
			`kernel_size=downsample_kernel_size, stride=stride,`
			`padding=downsample_padding, bias=False),`
			`nn.BatchNorm2d(planes * block.expansion),`
			`)`

			`layers = []`
			`layers.append(block(self.inplanes, planes, groups, reduction, stride,`
			`downsample))`
			`self.inplanes = planes * block.expansion`
			`for i in range(1, blocks):`
			`layers.append(block(self.inplanes, planes, groups, reduction))`

			`return nn.Sequential(*layers)`

add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):`
			`"""`
			`Construct fully connected layer`

			`- fc_dims (list or tuple): dimensions of fc layers, if None,`
			`no fc layers are constructed`
			`- input_dim (int): input dimension`
			`- dropout_p (float): dropout probability, if None, dropout is unused`
			`"""`
			`if fc_dims is None:`
			`self.feature_dim = input_dim`
			`return None`

			`assert isinstance(fc_dims, (list, tuple)), "fc_dims must be either list or tuple, but got {}".format(type(fc_dims))`

			`layers = []`
			`for dim in fc_dims:`
			`layers.append(nn.Linear(input_dim, dim))`
			`layers.append(nn.BatchNorm1d(dim))`
			`layers.append(nn.ReLU(inplace=True))`
			`if dropout_p is not None:`
			`layers.append(nn.Dropout(p=dropout_p))`
			`input_dim = dim`

			`self.feature_dim = fc_dims[-1]`

			`return nn.Sequential(*layers)`

			`def featuremaps(self, x):`
restore models 2018-07-02 10:33:10 +01:00			`x = self.layer0(x)`
			`x = self.layer1(x)`
			`x = self.layer2(x)`
			`x = self.layer3(x)`
			`x = self.layer4(x)`
			`return x`

			`def forward(self, x):`
add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`f = self.featuremaps(x)`
			`v = self.global_avgpool(f)`
			`v = v.view(v.size(0), -1)`

			`if self.fc is not None:`
			`v = self.fc(v)`

			`if not self.training:`
			`return v`

			`y = self.classifier(v)`

			`if self.loss == {'xent'}:`
			`return y`
			`elif self.loss == {'xent', 'htri'}:`
			`return y, v`
			`else:`
			`raise KeyError("Unsupported loss: {}".format(self.loss))`
restore models 2018-07-02 10:33:10 +01:00

add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`def init_pretrained_weights(model, model_url):`
			`"""`
			`Initialize model with pretrained weights.`
			`Layers that don't match with pretrained layers in name or size are kept unchanged.`
			`"""`
			`pretrain_dict = model_zoo.load_url(model_url, map_location=None)`
			`model_dict = model.state_dict()`
			`pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()}`
			`model_dict.update(pretrain_dict)`
			`model.load_state_dict(model_dict)`
			`print("Initialized model with pretrained weights from {}".format(model_url))`


			`def senet154(num_classes, loss, pretrained='imagenet', **kwargs):`
			`model = SENet(`
			`num_classes=num_classes,`
			`loss=loss,`
			`block=SEBottleneck,`
			`layers=[3, 8, 36, 3],`
			`groups=64,`
			`reduction=16,`
			`dropout_p=0.2,`
			`last_stride=2,`
			`fc_dims=None,`
			`**kwargs`
			`)`
			`if pretrained == 'imagenet':`
			`init_pretrained_weights(model, pretrained_settings['senet154']['imagenet']['url'])`
restore models 2018-07-02 10:33:10 +01:00			`return model`


add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`def se_resnet50(num_classes, loss, pretrained='imagenet', **kwargs):`
			`model = SENet(`
			`num_classes=num_classes,`
			`loss=loss,`
			`block=SEResNetBottleneck,`
			`layers=[3, 4, 6, 3],`
			`groups=1,`
			`reduction=16,`
			`dropout_p=None,`
			`inplanes=64,`
			`input_3x3=False,`
			`downsample_kernel_size=1,`
			`downsample_padding=0,`
			`last_stride=2,`
			`fc_dims=None,`
			`**kwargs`
			`)`
			`if pretrained == 'imagenet':`
			`init_pretrained_weights(model, pretrained_settings['se_resnet50']['imagenet']['url'])`
restore models 2018-07-02 10:33:10 +01:00			`return model`


add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`def se_resnet50_fc512(num_classes, loss, pretrained='imagenet', **kwargs):`
			`model = SENet(`
			`num_classes=num_classes,`
			`loss=loss,`
			`block=SEResNetBottleneck,`
			`layers=[3, 4, 6, 3],`
			`groups=1,`
			`reduction=16,`
			`dropout_p=None,`
			`inplanes=64,`
			`input_3x3=False,`
			`downsample_kernel_size=1,`
			`downsample_padding=0,`
			`last_stride=1,`
			`fc_dims=[512],`
			`**kwargs`
			`)`
			`if pretrained == 'imagenet':`
			`init_pretrained_weights(model, pretrained_settings['se_resnet50']['imagenet']['url'])`
restore models 2018-07-02 10:33:10 +01:00			`return model`


add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`def se_resnet101(num_classes, loss, pretrained='imagenet', **kwargs):`
			`model = SENet(`
			`num_classes=num_classes,`
			`loss=loss,`
			`block=SEResNetBottleneck,`
			`layers=[3, 4, 23, 3],`
			`groups=1,`
			`reduction=16,`
			`dropout_p=None,`
			`inplanes=64,`
			`input_3x3=False,`
			`downsample_kernel_size=1,`
			`downsample_padding=0,`
			`last_stride=2,`
			`fc_dims=None,`
			`**kwargs`
			`)`
			`if pretrained == 'imagenet':`
			`init_pretrained_weights(model, pretrained_settings['se_resnet101']['imagenet']['url'])`
restore models 2018-07-02 10:33:10 +01:00			`return model`


add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`def se_resnet152(num_classes, loss, pretrained='imagenet', **kwargs):`
			`model = SENet(`
			`num_classes=num_classes,`
			`loss=loss,`
			`block=SEResNetBottleneck,`
			`layers=[3, 8, 36, 3],`
			`groups=1,`
			`reduction=16,`
			`dropout_p=None,`
			`inplanes=64,`
			`input_3x3=False,`
			`downsample_kernel_size=1,`
			`downsample_padding=0,`
			`last_stride=2,`
			`fc_dims=None,`
			`**kwargs`
			`)`
			`if pretrained == 'imagenet':`
			`init_pretrained_weights(model, pretrained_settings['se_resnet152']['imagenet']['url'])`
restore models 2018-07-02 10:33:10 +01:00			`return model`


add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`def se_resnext50_32x4d(num_classes, loss, pretrained='imagenet', **kwargs):`
			`model = SENet(`
			`num_classes=num_classes,`
			`loss=loss,`
			`block=SEResNeXtBottleneck,`
			`layers=[3, 4, 6, 3],`
			`groups=32,`
			`reduction=16,`
			`dropout_p=None,`
			`inplanes=64,`
			`input_3x3=False,`
			`downsample_kernel_size=1,`
			`downsample_padding=0,`
			`last_stride=2,`
			`fc_dims=None,`
			`**kwargs`
			`)`
			`if pretrained == 'imagenet':`
			`init_pretrained_weights(model, pretrained_settings['se_resnext50_32x4d']['imagenet']['url'])`
restore models 2018-07-02 10:33:10 +01:00			`return model`


add dropout_p; update senet 2018-10-26 23:54:13 +01:00			`def se_resnext101_32x4d(num_classes, loss, pretrained='imagenet', **kwargs):`
			`model = SENet(`
			`num_classes=num_classes,`
			`loss=loss,`
			`block=SEResNeXtBottleneck,`
			`layers=[3, 4, 23, 3],`
			`groups=32,`
			`reduction=16,`
			`dropout_p=None,`
			`inplanes=64,`
			`input_3x3=False,`
			`downsample_kernel_size=1,`
			`downsample_padding=0,`
			`last_stride=2,`
			`fc_dims=None,`
			`**kwargs`
			`)`
			`if pretrained == 'imagenet':`
			`init_pretrained_weights(model, pretrained_settings['se_resnext101_32x4d']['imagenet']['url'])`
			`return model`