diff --git a/ppcls/arch/backbone/model_zoo/alexnet.py b/ppcls/arch/backbone/model_zoo/alexnet.py index 3e1d1aa52..b44901a63 100644 --- a/ppcls/arch/backbone/model_zoo/alexnet.py +++ b/ppcls/arch/backbone/model_zoo/alexnet.py @@ -23,10 +23,14 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"AlexNet": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams"} +MODEL_URLS = { + "AlexNet": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams" +} __all__ = list(MODEL_URLS.keys()) + class ConvPoolLayer(nn.Layer): def __init__(self, input_channels, @@ -64,7 +68,7 @@ class ConvPoolLayer(nn.Layer): class AlexNetDY(nn.Layer): - def __init__(self, class_dim=1000): + def __init__(self, class_num=1000): super(AlexNetDY, self).__init__() stdv = 1.0 / math.sqrt(3 * 11 * 11) @@ -119,7 +123,7 @@ class AlexNetDY(nn.Layer): name="fc7_offset", initializer=Uniform(-stdv, stdv))) self._fc8 = Linear( in_features=4096, - out_features=class_dim, + out_features=class_num, weight_attr=ParamAttr( name="fc8_weights", initializer=Uniform(-stdv, stdv)), bias_attr=ParamAttr( @@ -143,6 +147,7 @@ class AlexNetDY(nn.Layer): x = self._fc8(x) return x + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -155,7 +160,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): "pretrained type is not available. Please use `string` or `boolean` type." ) + def AlexNet(pretrained=False, use_ssld=False, **kwargs): model = AlexNetDY(**kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/darknet.py b/ppcls/arch/backbone/model_zoo/darknet.py index 16b4b8600..75aafd85b 100644 --- a/ppcls/arch/backbone/model_zoo/darknet.py +++ b/ppcls/arch/backbone/model_zoo/darknet.py @@ -23,10 +23,14 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"DarkNet53": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams"} +MODEL_URLS = { + "DarkNet53": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams" +} __all__ = list(MODEL_URLS.keys()) + class ConvBNLayer(nn.Layer): def __init__(self, input_channels, @@ -77,7 +81,7 @@ class BasicBlock(nn.Layer): class DarkNet(nn.Layer): - def __init__(self, class_dim=1000): + def __init__(self, class_num=1000): super(DarkNet, self).__init__() self.stages = [1, 2, 8, 8, 4] @@ -126,7 +130,7 @@ class DarkNet(nn.Layer): stdv = 1.0 / math.sqrt(1024.0) self._out = Linear( 1024, - class_dim, + class_num, weight_attr=ParamAttr( name="fc_weights", initializer=Uniform(-stdv, stdv)), bias_attr=ParamAttr(name="fc_offset")) @@ -172,6 +176,7 @@ class DarkNet(nn.Layer): x = self._out(x) return x + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -183,8 +188,10 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - + + def DarkNet53(pretrained=False, use_ssld=False, **kwargs): model = DarkNet(**kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/densenet.py b/ppcls/arch/backbone/model_zoo/densenet.py index 190959b80..7e6e20251 100644 --- a/ppcls/arch/backbone/model_zoo/densenet.py +++ b/ppcls/arch/backbone/model_zoo/densenet.py @@ -28,12 +28,18 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"DenseNet121": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams", - "DenseNet161": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams", - "DenseNet169": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams", - "DenseNet201": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams", - "DenseNet264": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams", - } +MODEL_URLS = { + "DenseNet121": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams", + "DenseNet161": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams", + "DenseNet169": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams", + "DenseNet201": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams", + "DenseNet264": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -196,7 +202,7 @@ class ConvBNLayer(nn.Layer): class DenseNet(nn.Layer): - def __init__(self, layers=60, bn_size=4, dropout=0, class_dim=1000): + def __init__(self, layers=60, bn_size=4, dropout=0, class_num=1000): super(DenseNet, self).__init__() supported_layers = [121, 161, 169, 201, 264] @@ -269,7 +275,7 @@ class DenseNet(nn.Layer): self.out = Linear( num_features, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc_weights"), bias_attr=ParamAttr(name="fc_offset")) @@ -289,6 +295,7 @@ class DenseNet(nn.Layer): y = self.out(y) return y + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -301,31 +308,37 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): "pretrained type is not available. Please use `string` or `boolean` type." ) + def DenseNet121(pretrained=False, use_ssld=False, **kwargs): model = DenseNet(layers=121, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld) return model def DenseNet161(pretrained=False, use_ssld=False, **kwargs): model = DenseNet(layers=161, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld) return model def DenseNet169(pretrained=False, use_ssld=False, **kwargs): model = DenseNet(layers=169, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld) return model def DenseNet201(pretrained=False, use_ssld=False, **kwargs): model = DenseNet(layers=201, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld) return model def DenseNet264(pretrained=False, use_ssld=False, **kwargs): model = DenseNet(layers=264, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py b/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py index b7c36192c..025d36123 100644 --- a/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py +++ b/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py @@ -19,15 +19,23 @@ from .vision_transformer import VisionTransformer, Identity, trunc_normal_, zero from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "DeiT_tiny_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams", - "DeiT_small_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams", - "DeiT_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams", - "DeiT_tiny_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams", - "DeiT_small_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams", - "DeiT_base_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams", - "DeiT_base_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams", - "DeiT_base_distilled_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams", - } + "DeiT_tiny_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams", + "DeiT_small_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams", + "DeiT_base_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams", + "DeiT_tiny_distilled_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams", + "DeiT_small_distilled_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams", + "DeiT_base_distilled_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams", + "DeiT_base_patch16_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams", + "DeiT_base_distilled_patch16_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -36,7 +44,7 @@ class DistilledVisionTransformer(VisionTransformer): def __init__(self, img_size=224, patch_size=16, - class_dim=1000, + class_num=1000, embed_dim=768, depth=12, num_heads=12, @@ -48,7 +56,7 @@ class DistilledVisionTransformer(VisionTransformer): super().__init__( img_size=img_size, patch_size=patch_size, - class_dim=class_dim, + class_num=class_num, embed_dim=embed_dim, depth=depth, num_heads=num_heads, @@ -68,7 +76,7 @@ class DistilledVisionTransformer(VisionTransformer): self.head_dist = nn.Linear( self.embed_dim, - self.class_dim) if self.class_dim > 0 else Identity() + self.class_num) if self.class_num > 0 else Identity() trunc_normal_(self.dist_token) trunc_normal_(self.pos_embed) @@ -109,7 +117,7 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - + def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs): model = VisionTransformer( @@ -121,7 +129,11 @@ def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs): qkv_bias=True, epsilon=1e-6, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DeiT_tiny_patch16_224"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_tiny_patch16_224"], + use_ssld=use_ssld) return model @@ -135,7 +147,11 @@ def DeiT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs): qkv_bias=True, epsilon=1e-6, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DeiT_small_patch16_224"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_small_patch16_224"], + use_ssld=use_ssld) return model @@ -149,11 +165,16 @@ def DeiT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): qkv_bias=True, epsilon=1e-6, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_patch16_224"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_base_patch16_224"], + use_ssld=use_ssld) return model -def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs): +def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False, + **kwargs): model = DistilledVisionTransformer( patch_size=16, embed_dim=192, @@ -163,11 +184,17 @@ def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs): qkv_bias=True, epsilon=1e-6, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DeiT_tiny_distilled_patch16_224"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_tiny_distilled_patch16_224"], + use_ssld=use_ssld) return model -def DeiT_small_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs): +def DeiT_small_distilled_patch16_224(pretrained=False, + use_ssld=False, + **kwargs): model = DistilledVisionTransformer( patch_size=16, embed_dim=384, @@ -177,11 +204,16 @@ def DeiT_small_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs) qkv_bias=True, epsilon=1e-6, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DeiT_small_distilled_patch16_224"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_small_distilled_patch16_224"], + use_ssld=use_ssld) return model -def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs): +def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False, + **kwargs): model = DistilledVisionTransformer( patch_size=16, embed_dim=768, @@ -191,7 +223,11 @@ def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs): qkv_bias=True, epsilon=1e-6, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_distilled_patch16_224"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_base_distilled_patch16_224"], + use_ssld=use_ssld) return model @@ -206,11 +242,16 @@ def DeiT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs): qkv_bias=True, epsilon=1e-6, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_patch16_384"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_base_patch16_384"], + use_ssld=use_ssld) return model -def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False, **kwargs): +def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False, + **kwargs): model = DistilledVisionTransformer( img_size=384, patch_size=16, @@ -221,5 +262,9 @@ def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False, **kwargs): qkv_bias=True, epsilon=1e-6, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_distilled_patch16_384"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_base_distilled_patch16_384"], + use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/dla.py b/ppcls/arch/backbone/model_zoo/dla.py index 51151710e..669055aeb 100644 --- a/ppcls/arch/backbone/model_zoo/dla.py +++ b/ppcls/arch/backbone/model_zoo/dla.py @@ -23,7 +23,6 @@ from paddle.nn.initializer import Normal, Constant from ppcls.arch.backbone.base.theseus_layer import Identity from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url - MODEL_URLS = { "DLA34": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams", @@ -47,10 +46,8 @@ MODEL_URLS = { "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams" } - __all__ = MODEL_URLS.keys() - zeros_ = Constant(value=0.) ones_ = Constant(value=1.) @@ -59,15 +56,23 @@ class DlaBasic(nn.Layer): def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs): super(DlaBasic, self).__init__() self.conv1 = nn.Conv2D( - inplanes, planes, kernel_size=3, stride=stride, - padding=dilation, bias_attr=False, dilation=dilation - ) + inplanes, + planes, + kernel_size=3, + stride=stride, + padding=dilation, + bias_attr=False, + dilation=dilation) self.bn1 = nn.BatchNorm2D(planes) self.relu = nn.ReLU() self.conv2 = nn.Conv2D( - planes, planes, kernel_size=3, stride=1, - padding=dilation, bias_attr=False, dilation=dilation - ) + planes, + planes, + kernel_size=3, + stride=1, + padding=dilation, + bias_attr=False, + dilation=dilation) self.bn2 = nn.BatchNorm2D(planes) self.stride = stride @@ -91,23 +96,34 @@ class DlaBasic(nn.Layer): class DlaBottleneck(nn.Layer): expansion = 2 - def __init__(self, inplanes, outplanes, stride=1, - dilation=1, cardinality=1, base_width=64): + def __init__(self, + inplanes, + outplanes, + stride=1, + dilation=1, + cardinality=1, + base_width=64): super(DlaBottleneck, self).__init__() self.stride = stride - mid_planes = int(math.floor( - outplanes * (base_width / 64)) * cardinality) + mid_planes = int( + math.floor(outplanes * (base_width / 64)) * cardinality) mid_planes = mid_planes // self.expansion - self.conv1 = nn.Conv2D(inplanes, mid_planes, kernel_size=1, bias_attr=False) + self.conv1 = nn.Conv2D( + inplanes, mid_planes, kernel_size=1, bias_attr=False) self.bn1 = nn.BatchNorm2D(mid_planes) self.conv2 = nn.Conv2D( - mid_planes, mid_planes, kernel_size=3, - stride=stride, padding=dilation, bias_attr=False, - dilation=dilation, groups=cardinality - ) + mid_planes, + mid_planes, + kernel_size=3, + stride=stride, + padding=dilation, + bias_attr=False, + dilation=dilation, + groups=cardinality) self.bn2 = nn.BatchNorm2D(mid_planes) - self.conv3 = nn.Conv2D(mid_planes, outplanes, kernel_size=1, bias_attr=False) + self.conv3 = nn.Conv2D( + mid_planes, outplanes, kernel_size=1, bias_attr=False) self.bn3 = nn.BatchNorm2D(outplanes) self.relu = nn.ReLU() @@ -136,9 +152,12 @@ class DlaRoot(nn.Layer): def __init__(self, in_channels, out_channels, kernel_size, residual): super(DlaRoot, self).__init__() self.conv = nn.Conv2D( - in_channels, out_channels, 1, stride=1, - bias_attr=False, padding=(kernel_size - 1) // 2 - ) + in_channels, + out_channels, + 1, + stride=1, + bias_attr=False, + padding=(kernel_size - 1) // 2) self.bn = nn.BatchNorm2D(out_channels) self.relu = nn.ReLU() self.residual = residual @@ -155,9 +174,18 @@ class DlaRoot(nn.Layer): class DlaTree(nn.Layer): - def __init__(self, levels, block, in_channels, out_channels, - stride=1,dilation=1, cardinality=1, base_width=64, - level_root=False, root_dim=0, root_kernel_size=1, + def __init__(self, + levels, + block, + in_channels, + out_channels, + stride=1, + dilation=1, + cardinality=1, + base_width=64, + level_root=False, + root_dim=0, + root_kernel_size=1, root_residual=False): super(DlaTree, self).__init__() if root_dim == 0: @@ -168,28 +196,45 @@ class DlaTree(nn.Layer): self.downsample = nn.MaxPool2D( stride, stride=stride) if stride > 1 else Identity() self.project = Identity() - cargs = dict(dilation=dilation, cardinality=cardinality, base_width=base_width) + cargs = dict( + dilation=dilation, cardinality=cardinality, base_width=base_width) if levels == 1: self.tree1 = block(in_channels, out_channels, stride, **cargs) self.tree2 = block(out_channels, out_channels, 1, **cargs) if in_channels != out_channels: self.project = nn.Sequential( - nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, bias_attr=False), + nn.Conv2D( + in_channels, + out_channels, + kernel_size=1, + stride=1, + bias_attr=False), nn.BatchNorm2D(out_channels)) else: - cargs.update(dict(root_kernel_size=root_kernel_size, root_residual=root_residual)) + cargs.update( + dict( + root_kernel_size=root_kernel_size, + root_residual=root_residual)) self.tree1 = DlaTree( - levels - 1, block, in_channels, - out_channels, stride, root_dim=0, **cargs - ) + levels - 1, + block, + in_channels, + out_channels, + stride, + root_dim=0, + **cargs) self.tree2 = DlaTree( - levels - 1, block, out_channels, - out_channels, root_dim=root_dim + out_channels, **cargs - ) + levels - 1, + block, + out_channels, + out_channels, + root_dim=root_dim + out_channels, + **cargs) if levels == 1: - self.root = DlaRoot(root_dim, out_channels, root_kernel_size, root_residual) + self.root = DlaRoot(root_dim, out_channels, root_kernel_size, + root_residual) self.level_root = level_root self.root_dim = root_dim @@ -214,12 +259,20 @@ class DlaTree(nn.Layer): class DLA(nn.Layer): - def __init__(self, levels, channels, in_chans=3, cardinality=1, - base_width=64, block=DlaBottleneck, residual_root=False, - drop_rate=0.0, class_dim=1000, with_pool=True): + def __init__(self, + levels, + channels, + in_chans=3, + cardinality=1, + base_width=64, + block=DlaBottleneck, + residual_root=False, + drop_rate=0.0, + class_num=1000, + with_pool=True): super(DLA, self).__init__() self.channels = channels - self.class_dim = class_dim + self.class_num = class_num self.with_pool = with_pool self.cardinality = cardinality self.base_width = base_width @@ -227,46 +280,72 @@ class DLA(nn.Layer): self.base_layer = nn.Sequential( nn.Conv2D( - in_chans, channels[0], kernel_size=7, - stride=1, padding=3, bias_attr=False - ), + in_chans, + channels[0], + kernel_size=7, + stride=1, + padding=3, + bias_attr=False), nn.BatchNorm2D(channels[0]), nn.ReLU()) - self.level0 = self._make_conv_level(channels[0], channels[0], levels[0]) - self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2) + self.level0 = self._make_conv_level(channels[0], channels[0], + levels[0]) + self.level1 = self._make_conv_level( + channels[0], channels[1], levels[1], stride=2) cargs = dict( - cardinality=cardinality, - base_width=base_width, - root_residual=residual_root - ) + cardinality=cardinality, + base_width=base_width, + root_residual=residual_root) self.level2 = DlaTree( - levels[2], block, channels[1], - channels[2], 2, level_root=False, **cargs - ) + levels[2], + block, + channels[1], + channels[2], + 2, + level_root=False, + **cargs) self.level3 = DlaTree( - levels[3], block, channels[2], - channels[3], 2, level_root=True, **cargs - ) + levels[3], + block, + channels[2], + channels[3], + 2, + level_root=True, + **cargs) self.level4 = DlaTree( - levels[4], block, channels[3], - channels[4], 2, level_root=True, **cargs - ) + levels[4], + block, + channels[3], + channels[4], + 2, + level_root=True, + **cargs) self.level5 = DlaTree( - levels[5], block, channels[4], - channels[5], 2, level_root=True, **cargs - ) + levels[5], + block, + channels[4], + channels[5], + 2, + level_root=True, + **cargs) self.feature_info = [ # rare to have a meaningful stride 1 level - dict(num_chs=channels[0], reduction=1, module='level0'), - dict(num_chs=channels[1], reduction=2, module='level1'), - dict(num_chs=channels[2], reduction=4, module='level2'), - dict(num_chs=channels[3], reduction=8, module='level3'), - dict(num_chs=channels[4], reduction=16, module='level4'), - dict(num_chs=channels[5], reduction=32, module='level5'), + dict( + num_chs=channels[0], reduction=1, module='level0'), + dict( + num_chs=channels[1], reduction=2, module='level1'), + dict( + num_chs=channels[2], reduction=4, module='level2'), + dict( + num_chs=channels[3], reduction=8, module='level3'), + dict( + num_chs=channels[4], reduction=16, module='level4'), + dict( + num_chs=channels[5], reduction=32, module='level5'), ] self.num_features = channels[-1] @@ -274,8 +353,8 @@ class DLA(nn.Layer): if with_pool: self.global_pool = nn.AdaptiveAvgPool2D(1) - if class_dim > 0: - self.fc = nn.Conv2D(self.num_features, class_dim, 1) + if class_num > 0: + self.fc = nn.Conv2D(self.num_features, class_num, 1) for m in self.sublayers(): if isinstance(m, nn.Conv2D): @@ -291,12 +370,14 @@ class DLA(nn.Layer): for i in range(convs): modules.extend([ nn.Conv2D( - inplanes, planes, kernel_size=3, + inplanes, + planes, + kernel_size=3, stride=stride if i == 0 else 1, - padding=dilation, bias_attr=False, dilation=dilation - ), - nn.BatchNorm2D(planes), - nn.ReLU()]) + padding=dilation, + bias_attr=False, + dilation=dilation), nn.BatchNorm2D(planes), nn.ReLU() + ]) inplanes = planes return nn.Sequential(*modules) @@ -321,7 +402,7 @@ class DLA(nn.Layer): if self.drop_rate > 0.: x = F.dropout(x, p=self.drop_rate, training=self.training) - if self.class_dim > 0: + if self.class_num > 0: x = self.fc(x) x = x.flatten(1) @@ -342,124 +423,104 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): def DLA34(pretrained=False, **kwargs): - model = DLA( - levels=(1, 1, 1, 2, 2, 1), - channels=(16, 32, 64, 128, 256, 512), - block=DlaBasic, - **kwargs - ) + model = DLA(levels=(1, 1, 1, 2, 2, 1), + channels=(16, 32, 64, 128, 256, 512), + block=DlaBasic, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DLA34"]) return model def DLA46_c(pretrained=False, **kwargs): - model = DLA( - levels=(1, 1, 1, 2, 2, 1), - channels=(16, 32, 64, 64, 128, 256), - block=DlaBottleneck, - **kwargs - ) + model = DLA(levels=(1, 1, 1, 2, 2, 1), + channels=(16, 32, 64, 64, 128, 256), + block=DlaBottleneck, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"]) return model def DLA46x_c(pretrained=False, **kwargs): - model = DLA( - levels=(1, 1, 1, 2, 2, 1), - channels=(16, 32, 64, 64, 128, 256), - block=DlaBottleneck, - cardinality=32, - base_width=4, - **kwargs - ) + model = DLA(levels=(1, 1, 1, 2, 2, 1), + channels=(16, 32, 64, 64, 128, 256), + block=DlaBottleneck, + cardinality=32, + base_width=4, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"]) return model def DLA60(pretrained=False, **kwargs): - model = DLA( - levels=(1, 1, 1, 2, 3, 1), - channels=(16, 32, 128, 256, 512, 1024), - block=DlaBottleneck, - **kwargs - ) + model = DLA(levels=(1, 1, 1, 2, 3, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DLA60"]) return model def DLA60x(pretrained=False, **kwargs): - model = DLA( - levels=(1, 1, 1, 2, 3, 1), - channels=(16, 32, 128, 256, 512, 1024), - block=DlaBottleneck, - cardinality=32, - base_width=4, - **kwargs - ) + model = DLA(levels=(1, 1, 1, 2, 3, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + cardinality=32, + base_width=4, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DLA60x"]) return model def DLA60x_c(pretrained=False, **kwargs): - model = DLA( - levels=(1, 1, 1, 2, 3, 1), - channels=(16, 32, 64, 64, 128, 256), - block=DlaBottleneck, - cardinality=32, - base_width=4, - **kwargs - ) + model = DLA(levels=(1, 1, 1, 2, 3, 1), + channels=(16, 32, 64, 64, 128, 256), + block=DlaBottleneck, + cardinality=32, + base_width=4, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"]) return model def DLA102(pretrained=False, **kwargs): - model = DLA( - levels=(1, 1, 1, 3, 4, 1), - channels=(16, 32, 128, 256, 512, 1024), - block=DlaBottleneck, - residual_root=True, - **kwargs - ) + model = DLA(levels=(1, 1, 1, 3, 4, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + residual_root=True, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DLA102"]) return model def DLA102x(pretrained=False, **kwargs): - model = DLA( - levels=(1, 1, 1, 3, 4, 1), - channels=(16, 32, 128, 256, 512, 1024), - block=DlaBottleneck, - cardinality=32, - base_width=4, - residual_root=True, - **kwargs - ) + model = DLA(levels=(1, 1, 1, 3, 4, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + cardinality=32, + base_width=4, + residual_root=True, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DLA102x"]) return model def DLA102x2(pretrained=False, **kwargs): - model = DLA( - levels=(1, 1, 1, 3, 4, 1), - channels=(16, 32, 128, 256, 512, 1024), - block=DlaBottleneck, - cardinality=64, - base_width=4, - residual_root=True, - **kwargs - ) + model = DLA(levels=(1, 1, 1, 3, 4, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + cardinality=64, + base_width=4, + residual_root=True, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"]) return model def DLA169(pretrained=False, **kwargs): - model = DLA( - levels=(1, 1, 2, 3, 5, 1), - channels=(16, 32, 128, 256, 512, 1024), - block=DlaBottleneck, - residual_root=True, - **kwargs - ) + model = DLA(levels=(1, 1, 2, 3, 5, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + residual_root=True, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DLA169"]) return model diff --git a/ppcls/arch/backbone/model_zoo/dpn.py b/ppcls/arch/backbone/model_zoo/dpn.py index 7741eb7ce..55953ed20 100644 --- a/ppcls/arch/backbone/model_zoo/dpn.py +++ b/ppcls/arch/backbone/model_zoo/dpn.py @@ -29,12 +29,18 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"DPN68": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams", - "DPN92": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams", - "DPN98": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams", - "DPN107": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams", - "DPN131": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams", - } +MODEL_URLS = { + "DPN68": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams", + "DPN92": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams", + "DPN98": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams", + "DPN107": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams", + "DPN131": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -211,10 +217,10 @@ class DualPathFactory(nn.Layer): class DPN(nn.Layer): - def __init__(self, layers=68, class_dim=1000): + def __init__(self, layers=68, class_num=1000): super(DPN, self).__init__() - self._class_dim = class_dim + self._class_num = class_num args = self.get_net_args(layers) bws = args['bw'] @@ -309,7 +315,7 @@ class DPN(nn.Layer): self.out = Linear( out_channel, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc_weights"), bias_attr=ParamAttr(name="fc_offset")) @@ -400,7 +406,8 @@ class DPN(nn.Layer): net_arg['init_padding'] = init_padding return net_arg - + + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -411,7 +418,7 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): else: raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." - ) + ) def DPN68(pretrained=False, use_ssld=False, **kwargs): @@ -441,4 +448,4 @@ def DPN107(pretrained=False, use_ssld=False, **kwargs): def DPN131(pretrained=False, use_ssld=False, **kwargs): model = DPN(layers=131, **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["DPN131"]) - return model \ No newline at end of file + return model diff --git a/ppcls/arch/backbone/model_zoo/efficientnet.py b/ppcls/arch/backbone/model_zoo/efficientnet.py index de2d52459..22b7fd1d8 100644 --- a/ppcls/arch/backbone/model_zoo/efficientnet.py +++ b/ppcls/arch/backbone/model_zoo/efficientnet.py @@ -11,16 +11,26 @@ import copy from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"EfficientNetB0_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams", - "EfficientNetB0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams", - "EfficientNetB1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams", - "EfficientNetB2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams", - "EfficientNetB3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams", - "EfficientNetB4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams", - "EfficientNetB5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams", - "EfficientNetB6": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams", - "EfficientNetB7": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams", - } +MODEL_URLS = { + "EfficientNetB0_small": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams", + "EfficientNetB0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams", + "EfficientNetB1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams", + "EfficientNetB2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams", + "EfficientNetB3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams", + "EfficientNetB4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams", + "EfficientNetB5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams", + "EfficientNetB6": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams", + "EfficientNetB7": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -725,7 +735,7 @@ class EfficientNet(nn.Layer): padding_type="SAME", override_params=None, use_se=True, - class_dim=1000): + class_num=1000): super(EfficientNet, self).__init__() model_name = 'efficientnet-' + name @@ -778,7 +788,7 @@ class EfficientNet(nn.Layer): param_attr, bias_attr = init_fc_layer("_fc") self._fc = Linear( output_channels, - class_dim, + class_num, weight_attr=param_attr, bias_attr=bias_attr) @@ -792,7 +802,7 @@ class EfficientNet(nn.Layer): x = self._fc(x) return x - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -803,14 +813,14 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): else: raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." - ) + ) def EfficientNetB0_small(padding_type='DYNAMIC', override_params=None, use_se=False, - pretrained=False, - use_ssld=False, + pretrained=False, + use_ssld=False, **kwargs): model = EfficientNet( name='b0', @@ -825,8 +835,8 @@ def EfficientNetB0_small(padding_type='DYNAMIC', def EfficientNetB0(padding_type='SAME', override_params=None, use_se=True, - pretrained=False, - use_ssld=False, + pretrained=False, + use_ssld=False, **kwargs): model = EfficientNet( name='b0', @@ -841,8 +851,8 @@ def EfficientNetB0(padding_type='SAME', def EfficientNetB1(padding_type='SAME', override_params=None, use_se=True, - pretrained=False, - use_ssld=False, + pretrained=False, + use_ssld=False, **kwargs): model = EfficientNet( name='b1', @@ -857,8 +867,8 @@ def EfficientNetB1(padding_type='SAME', def EfficientNetB2(padding_type='SAME', override_params=None, use_se=True, - pretrained=False, - use_ssld=False, + pretrained=False, + use_ssld=False, **kwargs): model = EfficientNet( name='b2', @@ -873,8 +883,8 @@ def EfficientNetB2(padding_type='SAME', def EfficientNetB3(padding_type='SAME', override_params=None, use_se=True, - pretrained=False, - use_ssld=False, + pretrained=False, + use_ssld=False, **kwargs): model = EfficientNet( name='b3', @@ -889,8 +899,8 @@ def EfficientNetB3(padding_type='SAME', def EfficientNetB4(padding_type='SAME', override_params=None, use_se=True, - pretrained=False, - use_ssld=False, + pretrained=False, + use_ssld=False, **kwargs): model = EfficientNet( name='b4', @@ -905,8 +915,8 @@ def EfficientNetB4(padding_type='SAME', def EfficientNetB5(padding_type='SAME', override_params=None, use_se=True, - pretrained=False, - use_ssld=False, + pretrained=False, + use_ssld=False, **kwargs): model = EfficientNet( name='b5', @@ -921,8 +931,8 @@ def EfficientNetB5(padding_type='SAME', def EfficientNetB6(padding_type='SAME', override_params=None, use_se=True, - pretrained=False, - use_ssld=False, + pretrained=False, + use_ssld=False, **kwargs): model = EfficientNet( name='b6', @@ -937,8 +947,8 @@ def EfficientNetB6(padding_type='SAME', def EfficientNetB7(padding_type='SAME', override_params=None, use_se=True, - pretrained=False, - use_ssld=False, + pretrained=False, + use_ssld=False, **kwargs): model = EfficientNet( name='b7', @@ -947,4 +957,4 @@ def EfficientNetB7(padding_type='SAME', use_se=use_se, **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB7"]) - return model \ No newline at end of file + return model diff --git a/ppcls/arch/backbone/model_zoo/ghostnet.py b/ppcls/arch/backbone/model_zoo/ghostnet.py index e557e0f9f..4a16d8a57 100644 --- a/ppcls/arch/backbone/model_zoo/ghostnet.py +++ b/ppcls/arch/backbone/model_zoo/ghostnet.py @@ -23,10 +23,14 @@ from paddle.nn.initializer import Uniform, KaimingNormal from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"GhostNet_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams", - "GhostNet_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams", - "GhostNet_x1_3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams", - } +MODEL_URLS = { + "GhostNet_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams", + "GhostNet_x1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams", + "GhostNet_x1_3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -215,7 +219,7 @@ class GhostBottleneck(nn.Layer): class GhostNet(nn.Layer): - def __init__(self, scale, class_dim=1000): + def __init__(self, scale, class_num=1000): super(GhostNet, self).__init__() self.cfgs = [ # k, t, c, SE, s @@ -290,7 +294,7 @@ class GhostNet(nn.Layer): stdv = 1.0 / math.sqrt(self._fc0_output_channels * 1.0) self.fc_1 = Linear( self._fc0_output_channels, - class_dim, + class_num, weight_attr=ParamAttr( name="fc_1_weights", initializer=Uniform(-stdv, stdv)), bias_attr=ParamAttr(name="fc_1_offset")) @@ -322,7 +326,7 @@ class GhostNet(nn.Layer): new_v += divisor return new_v - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -338,17 +342,20 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): def GhostNet_x0_5(pretrained=False, use_ssld=False, **kwargs): model = GhostNet(scale=0.5, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld) return model def GhostNet_x1_0(pretrained=False, use_ssld=False, **kwargs): model = GhostNet(scale=1.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld) return model def GhostNet_x1_3(pretrained=False, use_ssld=False, **kwargs): model = GhostNet(scale=1.3, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/googlenet.py b/ppcls/arch/backbone/model_zoo/googlenet.py index 7ef35a964..00b7feeb9 100644 --- a/ppcls/arch/backbone/model_zoo/googlenet.py +++ b/ppcls/arch/backbone/model_zoo/googlenet.py @@ -10,8 +10,10 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"GoogLeNet": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams", - } +MODEL_URLS = { + "GoogLeNet": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -101,7 +103,7 @@ class Inception(nn.Layer): class GoogLeNetDY(nn.Layer): - def __init__(self, class_dim=1000): + def __init__(self, class_num=1000): super(GoogLeNetDY, self).__init__() self._conv = ConvLayer(3, 64, 7, 2, name="conv1") self._pool = MaxPool2D(kernel_size=3, stride=2) @@ -134,7 +136,7 @@ class GoogLeNetDY(nn.Layer): self._drop = Dropout(p=0.4, mode="downscale_in_infer") self._fc_out = Linear( 1024, - class_dim, + class_num, weight_attr=xavier(1024, 1, "out"), bias_attr=ParamAttr(name="out_offset")) self._pool_o1 = AvgPool2D(kernel_size=5, stride=3) @@ -147,7 +149,7 @@ class GoogLeNetDY(nn.Layer): self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer") self._out1 = Linear( 1024, - class_dim, + class_num, weight_attr=xavier(1024, 1, "out1"), bias_attr=ParamAttr(name="out1_offset")) self._pool_o2 = AvgPool2D(kernel_size=5, stride=3) @@ -160,7 +162,7 @@ class GoogLeNetDY(nn.Layer): self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer") self._out2 = Linear( 1024, - class_dim, + class_num, weight_attr=xavier(1024, 1, "out2"), bias_attr=ParamAttr(name="out2_offset")) @@ -205,8 +207,8 @@ class GoogLeNetDY(nn.Layer): x = self._drop_o2(x) out2 = self._out2(x) return [out, out1, out2] - - + + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -222,5 +224,6 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): def GoogLeNet(pretrained=False, use_ssld=False, **kwargs): model = GoogLeNetDY(**kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/gvt.py b/ppcls/arch/backbone/model_zoo/gvt.py index 659be4964..8453cc27a 100644 --- a/ppcls/arch/backbone/model_zoo/gvt.py +++ b/ppcls/arch/backbone/model_zoo/gvt.py @@ -25,18 +25,23 @@ from .vision_transformer import Block as ViTBlock from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "pcpvt_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams", - "pcpvt_base": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams", - "pcpvt_large": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams", - "alt_gvt_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams", - "alt_gvt_base": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams", - "alt_gvt_large": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams" - } + "pcpvt_small": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams", + "pcpvt_base": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams", + "pcpvt_large": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams", + "alt_gvt_small": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams", + "alt_gvt_base": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams", + "alt_gvt_large": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams" +} __all__ = list(MODEL_URLS.keys()) - class GroupAttention(nn.Layer): """LSA: self attention within a group. """ @@ -522,7 +527,7 @@ class ALTGVT(PCPVT): img_size=224, patch_size=4, in_chans=3, - class_dim=1000, + class_num=1000, embed_dims=[64, 128, 256], num_heads=[1, 2, 4], mlp_ratios=[4, 4, 4], @@ -536,7 +541,7 @@ class ALTGVT(PCPVT): sr_ratios=[4, 2, 1], block_cls=GroupBlock, wss=[7, 7, 7]): - super().__init__(img_size, patch_size, in_chans, class_dim, embed_dims, + super().__init__(img_size, patch_size, in_chans, class_num, embed_dims, num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate, attn_drop_rate, drop_path_rate, norm_layer, depths, sr_ratios, block_cls) @@ -568,6 +573,7 @@ class ALTGVT(PCPVT): cur += depths[k] self.apply(self._init_weights) + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -593,7 +599,8 @@ def pcpvt_small(pretrained=False, use_ssld=False, **kwargs): depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["pcpvt_small"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["pcpvt_small"], use_ssld=use_ssld) return model @@ -609,7 +616,8 @@ def pcpvt_base(pretrained=False, use_ssld=False, **kwargs): depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1], **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["pcpvt_base"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["pcpvt_base"], use_ssld=use_ssld) return model @@ -625,7 +633,8 @@ def pcpvt_large(pretrained=False, use_ssld=False, **kwargs): depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1], **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["pcpvt_large"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["pcpvt_large"], use_ssld=use_ssld) return model @@ -642,7 +651,8 @@ def alt_gvt_small(pretrained=False, use_ssld=False, **kwargs): wss=[7, 7, 7, 7], sr_ratios=[8, 4, 2, 1], **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["alt_gvt_small"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["alt_gvt_small"], use_ssld=use_ssld) return model @@ -659,7 +669,8 @@ def alt_gvt_base(pretrained=False, use_ssld=False, **kwargs): wss=[7, 7, 7, 7], sr_ratios=[8, 4, 2, 1], **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["alt_gvt_base"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["alt_gvt_base"], use_ssld=use_ssld) return model @@ -676,5 +687,6 @@ def alt_gvt_large(pretrained=False, use_ssld=False, **kwargs): wss=[7, 7, 7, 7], sr_ratios=[8, 4, 2, 1], **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["alt_gvt_large"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["alt_gvt_large"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/hardnet.py b/ppcls/arch/backbone/model_zoo/hardnet.py index b3d5f9a45..112dc3dd8 100644 --- a/ppcls/arch/backbone/model_zoo/hardnet.py +++ b/ppcls/arch/backbone/model_zoo/hardnet.py @@ -17,7 +17,6 @@ import paddle.nn as nn from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url - MODEL_URLS = { 'HarDNet39_ds': 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet39_ds_pretrained.pdparams', @@ -29,51 +28,70 @@ MODEL_URLS = { 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet85_pretrained.pdparams' } - __all__ = MODEL_URLS.keys() -def ConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False): +def ConvLayer(in_channels, + out_channels, + kernel_size=3, + stride=1, + bias_attr=False): layer = nn.Sequential( ('conv', nn.Conv2D( - in_channels, out_channels, kernel_size=kernel_size, - stride=stride, padding=kernel_size//2, groups=1, bias_attr=bias_attr - )), - ('norm', nn.BatchNorm2D(out_channels)), - ('relu', nn.ReLU6()) - ) + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=kernel_size // 2, + groups=1, + bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)), + ('relu', nn.ReLU6())) return layer -def DWConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False): +def DWConvLayer(in_channels, + out_channels, + kernel_size=3, + stride=1, + bias_attr=False): layer = nn.Sequential( ('dwconv', nn.Conv2D( - in_channels, out_channels, kernel_size=kernel_size, - stride=stride, padding=1, groups=out_channels, bias_attr=bias_attr - )), - ('norm', nn.BatchNorm2D(out_channels)) - ) + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=1, + groups=out_channels, + bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels))) return layer def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1): layer = nn.Sequential( - ('layer1', ConvLayer(in_channels, out_channels, kernel_size=kernel_size)), - ('layer2', DWConvLayer(out_channels, out_channels, stride=stride)) - ) + ('layer1', ConvLayer( + in_channels, out_channels, kernel_size=kernel_size)), + ('layer2', DWConvLayer( + out_channels, out_channels, stride=stride))) return layer class HarDBlock(nn.Layer): - def __init__(self, in_channels, growth_rate, grmul, n_layers, - keepBase=False, residual_out=False, dwconv=False): + def __init__(self, + in_channels, + growth_rate, + grmul, + n_layers, + keepBase=False, + residual_out=False, + dwconv=False): super().__init__() self.keepBase = keepBase self.links = [] layers_ = [] self.out_channels = 0 # if upsample else in_channels for i in range(n_layers): - outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul) + outch, inch, link = self.get_link(i + 1, in_channels, growth_rate, + grmul) self.links.append(link) if dwconv: layers_.append(CombConvLayer(inch, outch)) @@ -92,7 +110,7 @@ class HarDBlock(nn.Layer): link = [] for i in range(10): - dv = 2 ** i + dv = 2**i if layer % dv == 0: k = layer - dv link.append(k) @@ -126,7 +144,7 @@ class HarDBlock(nn.Layer): t = len(layers_) out_ = [] for i in range(t): - if (i == 0 and self.keepBase) or (i == t-1) or (i % 2 == 1): + if (i == 0 and self.keepBase) or (i == t - 1) or (i % 2 == 1): out_.append(layers_[i]) out = paddle.concat(out_, 1) @@ -134,8 +152,11 @@ class HarDBlock(nn.Layer): class HarDNet(nn.Layer): - def __init__(self, depth_wise=False, arch=85, - class_dim=1000, with_pool=True): + def __init__(self, + depth_wise=False, + arch=85, + class_num=1000, + with_pool=True): super().__init__() first_ch = [32, 64] second_kernel = 3 @@ -146,16 +167,16 @@ class HarDNet(nn.Layer): # HarDNet68 ch_list = [128, 256, 320, 640, 1024] gr = [14, 16, 20, 40, 160] - n_layers = [8, 16, 16, 16, 4] - downSamp = [1, 0, 1, 1, 0] + n_layers = [8, 16, 16, 16, 4] + downSamp = [1, 0, 1, 1, 0] if arch == 85: # HarDNet85 first_ch = [48, 96] ch_list = [192, 256, 320, 480, 720, 1280] - gr = [24, 24, 28, 36, 48, 256] - n_layers = [8, 16, 16, 16, 16, 4] - downSamp = [1, 0, 1, 0, 1, 0] + gr = [24, 24, 28, 36, 48, 256] + n_layers = [8, 16, 16, 16, 16, 4] + downSamp = [1, 0, 1, 0, 1, 0] drop_rate = 0.2 elif arch == 39: @@ -163,9 +184,9 @@ class HarDNet(nn.Layer): first_ch = [24, 48] ch_list = [96, 320, 640, 1024] grmul = 1.6 - gr = [16, 20, 64, 160] - n_layers = [4, 16, 8, 4] - downSamp = [1, 1, 1, 0] + gr = [16, 20, 64, 160] + n_layers = [4, 16, 8, 4] + downSamp = [1, 1, 1, 0] if depth_wise: second_kernel = 1 @@ -177,12 +198,17 @@ class HarDNet(nn.Layer): # First Layer: Standard Conv3x3, Stride=2 self.base.append( - ConvLayer(in_channels=3, out_channels=first_ch[0], kernel_size=3, - stride=2, bias_attr=False)) + ConvLayer( + in_channels=3, + out_channels=first_ch[0], + kernel_size=3, + stride=2, + bias_attr=False)) # Second Layer self.base.append( - ConvLayer(first_ch[0], first_ch[1], kernel_size=second_kernel)) + ConvLayer( + first_ch[0], first_ch[1], kernel_size=second_kernel)) # Maxpooling or DWConv3x3 downsampling if max_pool: @@ -197,7 +223,7 @@ class HarDNet(nn.Layer): ch = blk.out_channels self.base.append(blk) - if i == blks-1 and arch == 85: + if i == blks - 1 and arch == 85: self.base.append(nn.Dropout(0.1)) self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1)) @@ -208,17 +234,17 @@ class HarDNet(nn.Layer): else: self.base.append(DWConvLayer(ch, ch, stride=2)) - ch = ch_list[blks-1] + ch = ch_list[blks - 1] layers = [] if with_pool: layers.append(nn.AdaptiveAvgPool2D((1, 1))) - if class_dim > 0: + if class_num > 0: layers.append(nn.Flatten()) layers.append(nn.Dropout(drop_rate)) - layers.append(nn.Linear(ch, class_dim)) + layers.append(nn.Linear(ch, class_num)) self.base.append(nn.Sequential(*layers)) diff --git a/ppcls/arch/backbone/model_zoo/inception_v4.py b/ppcls/arch/backbone/model_zoo/inception_v4.py index 37cef5c20..e0460d48b 100644 --- a/ppcls/arch/backbone/model_zoo/inception_v4.py +++ b/ppcls/arch/backbone/model_zoo/inception_v4.py @@ -23,7 +23,10 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"InceptionV4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams"} +MODEL_URLS = { + "InceptionV4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams" +} __all__ = list(MODEL_URLS.keys()) @@ -392,7 +395,7 @@ class InceptionC(nn.Layer): class InceptionV4DY(nn.Layer): - def __init__(self, class_dim=1000): + def __init__(self, class_num=1000): super(InceptionV4DY, self).__init__() self._inception_stem = InceptionStem() @@ -420,7 +423,7 @@ class InceptionV4DY(nn.Layer): stdv = 1.0 / math.sqrt(1536 * 1.0) self.out = Linear( 1536, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="final_fc_weights"), bias_attr=ParamAttr(name="final_fc_offset")) @@ -466,7 +469,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): "pretrained type is not available. Please use `string` or `boolean` type." ) + def InceptionV4(pretrained=False, use_ssld=False, **kwargs): model = InceptionV4DY(**kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/levit.py b/ppcls/arch/backbone/model_zoo/levit.py index bb74e00c6..78d013d65 100644 --- a/ppcls/arch/backbone/model_zoo/levit.py +++ b/ppcls/arch/backbone/model_zoo/levit.py @@ -27,12 +27,17 @@ from .vision_transformer import trunc_normal_, zeros_, ones_, Identity from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "LeViT_128S": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams", - "LeViT_128": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams", - "LeViT_192": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams", - "LeViT_256": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams", - "LeViT_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams", - } + "LeViT_128S": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams", + "LeViT_128": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams", + "LeViT_192": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams", + "LeViT_256": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams", + "LeViT_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -326,7 +331,7 @@ class LeViT(nn.Layer): img_size=224, patch_size=16, in_chans=3, - class_dim=1000, + class_num=1000, embed_dim=[192], key_dim=[64], depth=[12], @@ -341,7 +346,7 @@ class LeViT(nn.Layer): drop_path=0): super().__init__() - self.class_dim = class_dim + self.class_num = class_num self.num_features = embed_dim[-1] self.embed_dim = embed_dim self.distillation = distillation @@ -403,10 +408,10 @@ class LeViT(nn.Layer): # Classifier head self.head = BN_Linear(embed_dim[-1], - class_dim) if class_dim > 0 else Identity() + class_num) if class_num > 0 else Identity() if distillation: self.head_dist = BN_Linear( - embed_dim[-1], class_dim) if class_dim > 0 else Identity() + embed_dim[-1], class_num) if class_num > 0 else Identity() def forward(self, x): x = self.patch_embed(x) @@ -423,7 +428,7 @@ class LeViT(nn.Layer): return x -def model_factory(C, D, X, N, drop_path, class_dim, distillation): +def model_factory(C, D, X, N, drop_path, class_num, distillation): embed_dim = [int(x) for x in C.split('_')] num_heads = [int(x) for x in N.split('_')] depth = [int(x) for x in X.split('_')] @@ -444,7 +449,7 @@ def model_factory(C, D, X, N, drop_path, class_dim, distillation): attention_activation=act, mlp_activation=act, hybrid_backbone=b16(embed_dim[0], activation=act), - class_dim=class_dim, + class_num=class_num, drop_path=drop_path, distillation=distillation) @@ -489,6 +494,7 @@ specification = { }, } + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -502,46 +508,71 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): ) -def LeViT_128S(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs): +def LeViT_128S(pretrained=False, + use_ssld=False, + class_num=1000, + distillation=False, + **kwargs): model = model_factory( **specification['LeViT_128S'], - class_dim=class_dim, + class_num=class_num, distillation=distillation) - _load_pretrained(pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld) return model -def LeViT_128(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs): +def LeViT_128(pretrained=False, + use_ssld=False, + class_num=1000, + distillation=False, + **kwargs): model = model_factory( **specification['LeViT_128'], - class_dim=class_dim, + class_num=class_num, distillation=distillation) - _load_pretrained(pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld) return model -def LeViT_192(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs): +def LeViT_192(pretrained=False, + use_ssld=False, + class_num=1000, + distillation=False, + **kwargs): model = model_factory( **specification['LeViT_192'], - class_dim=class_dim, + class_num=class_num, distillation=distillation) - _load_pretrained(pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld) return model -def LeViT_256(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs): +def LeViT_256(pretrained=False, + use_ssld=False, + class_num=1000, + distillation=False, + **kwargs): model = model_factory( **specification['LeViT_256'], - class_dim=class_dim, + class_num=class_num, distillation=distillation) - _load_pretrained(pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld) return model -def LeViT_384(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs): +def LeViT_384(pretrained=False, + use_ssld=False, + class_num=1000, + distillation=False, + **kwargs): model = model_factory( **specification['LeViT_384'], - class_dim=class_dim, + class_num=class_num, distillation=distillation) - _load_pretrained(pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/mixnet.py b/ppcls/arch/backbone/model_zoo/mixnet.py index 13582acb8..db460173d 100644 --- a/ppcls/arch/backbone/model_zoo/mixnet.py +++ b/ppcls/arch/backbone/model_zoo/mixnet.py @@ -25,9 +25,14 @@ import paddle.nn as nn from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"MixNet_S": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams", - "MixNet_M": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams", - "MixNet_L": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams"} +MODEL_URLS = { + "MixNet_S": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams", + "MixNet_M": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams", + "MixNet_L": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams" +} __all__ = list(MODEL_URLS.keys()) @@ -617,7 +622,7 @@ class MixNet(nn.Layer): Number of input channels. in_size : tuple of two ints, default (224, 224) Spatial size of the expected input image. - class_dim : int, default 1000 + class_num : int, default 1000 Number of classification classes. """ @@ -632,10 +637,10 @@ class MixNet(nn.Layer): se_factors, in_channels=3, in_size=(224, 224), - class_dim=1000): + class_num=1000): super(MixNet, self).__init__() self.in_size = in_size - self.class_dim = class_dim + self.class_num = class_num self.features = nn.Sequential() self.features.add_sublayer( @@ -687,7 +692,7 @@ class MixNet(nn.Layer): kernel_size=7, stride=1)) self.output = nn.Linear( - in_features=in_channels, out_features=class_dim) + in_features=in_channels, out_features=class_num) def forward(self, x): x = self.features(x) @@ -773,9 +778,11 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): "pretrained type is not available. Please use `string` or `boolean` type." ) + def MixNet_S(pretrained=False, use_ssld=False, **kwargs): model = InceptionV4DY(**kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld) return model @@ -786,7 +793,8 @@ def MixNet_S(**kwargs): """ model = get_mixnet( version="s", width_scale=1.0, model_name="MixNet_S", **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld) return model @@ -797,7 +805,8 @@ def MixNet_M(**kwargs): """ model = get_mixnet( version="m", width_scale=1.0, model_name="MixNet_M", **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld) return model @@ -808,6 +817,6 @@ def MixNet_L(**kwargs): """ model = get_mixnet( version="m", width_scale=1.3, model_name="MixNet_L", **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld) return model - diff --git a/ppcls/arch/backbone/model_zoo/mobilenet_v2.py b/ppcls/arch/backbone/model_zoo/mobilenet_v2.py index 4cafd1461..b32c0250b 100644 --- a/ppcls/arch/backbone/model_zoo/mobilenet_v2.py +++ b/ppcls/arch/backbone/model_zoo/mobilenet_v2.py @@ -28,12 +28,20 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"MobileNetV2_x0_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams", - "MobileNetV2_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams", - "MobileNetV2_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams", - "MobileNetV2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams", - "MobileNetV2_x1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams", - "MobileNetV2_x2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"} +MODEL_URLS = { + "MobileNetV2_x0_25": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams", + "MobileNetV2_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams", + "MobileNetV2_x0_75": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams", + "MobileNetV2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams", + "MobileNetV2_x1_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams", + "MobileNetV2_x2_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams" +} __all__ = list(MODEL_URLS.keys()) @@ -155,10 +163,10 @@ class InvresiBlocks(nn.Layer): class MobileNet(nn.Layer): - def __init__(self, class_dim=1000, scale=1.0, prefix_name=""): + def __init__(self, class_num=1000, scale=1.0, prefix_name=""): super(MobileNet, self).__init__() self.scale = scale - self.class_dim = class_dim + self.class_num = class_num bottleneck_params_list = [ (1, 16, 1, 1), @@ -209,7 +217,7 @@ class MobileNet(nn.Layer): self.out = Linear( self.out_c, - class_dim, + class_num, weight_attr=ParamAttr(name=prefix_name + "fc10_weights"), bias_attr=ParamAttr(name=prefix_name + "fc10_offset")) @@ -222,8 +230,8 @@ class MobileNet(nn.Layer): y = paddle.flatten(y, start_axis=1, stop_axis=-1) y = self.out(y) return y - - + + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -235,39 +243,45 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - + def MobileNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs): model = MobileNet(scale=0.25, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld) return model def MobileNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs): model = MobileNet(scale=0.5, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld) return model def MobileNetV2_x0_75(pretrained=False, use_ssld=False, **kwargs): model = MobileNet(scale=0.75, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld) return model def MobileNetV2(pretrained=False, use_ssld=False, **kwargs): model = MobileNet(scale=1.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld) return model def MobileNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs): model = MobileNet(scale=1.5, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld) return model def MobileNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs): model = MobileNet(scale=2.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/rednet.py b/ppcls/arch/backbone/model_zoo/rednet.py index a113a32ac..12802d59c 100644 --- a/ppcls/arch/backbone/model_zoo/rednet.py +++ b/ppcls/arch/backbone/model_zoo/rednet.py @@ -19,7 +19,6 @@ from paddle.vision.models import resnet from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url - MODEL_URLS = { "RedNet26": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet26_pretrained.pdparams", @@ -33,7 +32,6 @@ MODEL_URLS = { "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet152_pretrained.pdparams" } - __all__ = MODEL_URLS.keys() @@ -51,50 +49,53 @@ class Involution(nn.Layer): in_channels=channels, out_channels=channels // reduction_ratio, kernel_size=1, - bias_attr=False - )), + bias_attr=False)), ('bn', nn.BatchNorm2D(channels // reduction_ratio)), - ('activate', nn.ReLU()) - ) - self.conv2 = nn.Sequential( - ('conv', nn.Conv2D( - in_channels=channels // reduction_ratio, - out_channels=kernel_size**2 * self.groups, - kernel_size=1, - stride=1 - )) - ) + ('activate', nn.ReLU())) + self.conv2 = nn.Sequential(('conv', nn.Conv2D( + in_channels=channels // reduction_ratio, + out_channels=kernel_size**2 * self.groups, + kernel_size=1, + stride=1))) if stride > 1: self.avgpool = nn.AvgPool2D(stride, stride) def forward(self, x): - weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x))) + weight = self.conv2( + self.conv1(x if self.stride == 1 else self.avgpool(x))) b, c, h, w = weight.shape - weight = weight.reshape((b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2) + weight = weight.reshape( + (b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2) - out = nn.functional.unfold(x, self.kernel_size, self.stride, (self.kernel_size-1)//2, 1) - out = out.reshape((b, self.groups, self.group_channels, self.kernel_size**2, h, w)) + out = nn.functional.unfold(x, self.kernel_size, self.stride, + (self.kernel_size - 1) // 2, 1) + out = out.reshape( + (b, self.groups, self.group_channels, self.kernel_size**2, h, w)) out = (weight * out).sum(axis=3).reshape((b, self.channels, h, w)) return out class BottleneckBlock(resnet.BottleneckBlock): - def __init__(self, inplanes, planes, stride=1, downsample=None, - groups=1, base_width=64, dilation=1, norm_layer=None): - super(BottleneckBlock, self).__init__( - inplanes, planes, stride, downsample, - groups, base_width, dilation, norm_layer - ) + def __init__(self, + inplanes, + planes, + stride=1, + downsample=None, + groups=1, + base_width=64, + dilation=1, + norm_layer=None): + super(BottleneckBlock, self).__init__(inplanes, planes, stride, + downsample, groups, base_width, + dilation, norm_layer) width = int(planes * (base_width / 64.)) * groups self.conv2 = Involution(width, 7, stride) class RedNet(resnet.ResNet): - def __init__(self, block, depth, class_dim=1000, with_pool=True): + def __init__(self, block, depth, class_num=1000, with_pool=True): super(RedNet, self).__init__( - block=block, depth=50, - num_classes=class_dim, with_pool=with_pool - ) + block=block, depth=50, num_classes=class_num, with_pool=with_pool) layer_cfg = { 26: [1, 2, 4, 1], 38: [2, 3, 5, 2], @@ -108,7 +109,7 @@ class RedNet(resnet.ResNet): self.bn1 = None self.relu = None self.inplanes = 64 - self.class_dim = class_dim + self.class_num = class_num self.stem = nn.Sequential( nn.Sequential( ('conv', nn.Conv2D( @@ -117,11 +118,9 @@ class RedNet(resnet.ResNet): kernel_size=3, stride=2, padding=1, - bias_attr=False - )), + bias_attr=False)), ('bn', nn.BatchNorm2D(self.inplanes // 2)), - ('activate', nn.ReLU()) - ), + ('activate', nn.ReLU())), Involution(self.inplanes // 2, 3, 1), nn.BatchNorm2D(self.inplanes // 2), nn.ReLU(), @@ -132,12 +131,8 @@ class RedNet(resnet.ResNet): kernel_size=3, stride=1, padding=1, - bias_attr=False - )), - ('bn', nn.BatchNorm2D(self.inplanes)), - ('activate', nn.ReLU()) - ) - ) + bias_attr=False)), ('bn', nn.BatchNorm2D(self.inplanes)), + ('activate', nn.ReLU()))) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) @@ -156,7 +151,7 @@ class RedNet(resnet.ResNet): if self.with_pool: x = self.avgpool(x) - if self.class_dim > 0: + if self.class_num > 0: x = paddle.flatten(x, 1) x = self.fc(x) diff --git a/ppcls/arch/backbone/model_zoo/regnet.py b/ppcls/arch/backbone/model_zoo/regnet.py index 86802ee7e..549bd1617 100644 --- a/ppcls/arch/backbone/model_zoo/regnet.py +++ b/ppcls/arch/backbone/model_zoo/regnet.py @@ -28,13 +28,20 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"RegNetX_200MF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams", - "RegNetX_4GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams", - "RegNetX_32GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams", - "RegNetY_200MF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_200MF_pretrained.pdparams", - "RegNetY_4GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams", - "RegNetY_32GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams", - } +MODEL_URLS = { + "RegNetX_200MF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams", + "RegNetX_4GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams", + "RegNetX_32GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams", + "RegNetY_200MF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_200MF_pretrained.pdparams", + "RegNetY_4GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams", + "RegNetY_32GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -235,7 +242,7 @@ class RegNet(nn.Layer): bot_mul, q=8, se_on=False, - class_dim=1000): + class_num=1000): super(RegNet, self).__init__() # Generate RegNet ws per block @@ -301,7 +308,7 @@ class RegNet(nn.Layer): self.out = Linear( self.pool2d_avg_channels, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), bias_attr=ParamAttr(name="fc_0.b_0")) @@ -315,7 +322,7 @@ class RegNet(nn.Layer): y = self.out(y) return y - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -327,12 +334,20 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - - + + def RegNetX_200MF(pretrained=False, use_ssld=False, **kwargs): model = RegNet( - w_a=36.44, w_0=24, w_m=2.49, d=13, group_w=8, bot_mul=1.0, q=8, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_200MF"], use_ssld=use_ssld) + w_a=36.44, + w_0=24, + w_m=2.49, + d=13, + group_w=8, + bot_mul=1.0, + q=8, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_200MF"], use_ssld=use_ssld) return model @@ -346,7 +361,8 @@ def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs): bot_mul=1.0, q=8, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_4GF"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_4GF"], use_ssld=use_ssld) return model @@ -360,7 +376,8 @@ def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs): bot_mul=1.0, q=8, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) return model @@ -375,7 +392,8 @@ def RegNetY_200MF(pretrained=False, use_ssld=False, **kwargs): q=8, se_on=True, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) return model @@ -390,7 +408,8 @@ def RegNetY_4GF(pretrained=False, use_ssld=False, **kwargs): q=8, se_on=True, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) return model @@ -405,5 +424,6 @@ def RegNetY_32GF(pretrained=False, use_ssld=False, **kwargs): q=8, se_on=True, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/repvgg.py b/ppcls/arch/backbone/model_zoo/repvgg.py index 2447fbe25..94b9355ea 100644 --- a/ppcls/arch/backbone/model_zoo/repvgg.py +++ b/ppcls/arch/backbone/model_zoo/repvgg.py @@ -4,24 +4,37 @@ import numpy as np from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"RepVGG_A0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams", - "RepVGG_A1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams", - "RepVGG_A2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams", - "RepVGG_B0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams", - "RepVGG_B1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams", - "RepVGG_B2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams", - "RepVGG_B3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams", - "RepVGG_B1g2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams", - "RepVGG_B1g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams", - "RepVGG_B2g2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g2_pretrained.pdparams", - "RepVGG_B2g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams", - "RepVGG_B3g2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g2_pretrained.pdparams", - "RepVGG_B3g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams", - } +MODEL_URLS = { + "RepVGG_A0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams", + "RepVGG_A1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams", + "RepVGG_A2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams", + "RepVGG_B0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams", + "RepVGG_B1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams", + "RepVGG_B2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams", + "RepVGG_B3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams", + "RepVGG_B1g2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams", + "RepVGG_B1g4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams", + "RepVGG_B2g2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g2_pretrained.pdparams", + "RepVGG_B2g4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams", + "RepVGG_B3g2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g2_pretrained.pdparams", + "RepVGG_B3g4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) - optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26] g2_map = {l: 2 for l in optional_groupwise_layers} g4_map = {l: 4 for l in optional_groupwise_layers} @@ -174,7 +187,7 @@ class RepVGG(nn.Layer): num_blocks, width_multiplier=None, override_groups_map=None, - class_dim=1000): + class_num=1000): super(RepVGG, self).__init__() assert len(width_multiplier) == 4 @@ -200,7 +213,7 @@ class RepVGG(nn.Layer): self.stage4 = self._make_stage( int(512 * width_multiplier[3]), num_blocks[3], stride=2) self.gap = nn.AdaptiveAvgPool2D(output_size=1) - self.linear = nn.Linear(int(512 * width_multiplier[3]), class_dim) + self.linear = nn.Linear(int(512 * width_multiplier[3]), class_num) def _make_stage(self, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) @@ -248,7 +261,7 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - + def RepVGG_A0(pretrained=False, use_ssld=False, **kwargs): model = RepVGG( @@ -256,7 +269,8 @@ def RepVGG_A0(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[0.75, 0.75, 0.75, 2.5], override_groups_map=None, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_A0"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_A0"], use_ssld=use_ssld) return model @@ -266,7 +280,8 @@ def RepVGG_A1(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[1, 1, 1, 2.5], override_groups_map=None, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_A1"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_A1"], use_ssld=use_ssld) return model @@ -276,7 +291,8 @@ def RepVGG_A2(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[1.5, 1.5, 1.5, 2.75], override_groups_map=None, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_A2"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_A2"], use_ssld=use_ssld) return model @@ -286,7 +302,8 @@ def RepVGG_B0(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[1, 1, 1, 2.5], override_groups_map=None, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B0"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B0"], use_ssld=use_ssld) return model @@ -296,7 +313,8 @@ def RepVGG_B1(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[2, 2, 2, 4], override_groups_map=None, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B1"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B1"], use_ssld=use_ssld) return model @@ -306,7 +324,8 @@ def RepVGG_B1g2(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[2, 2, 2, 4], override_groups_map=g2_map, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B1g2"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B1g2"], use_ssld=use_ssld) return model @@ -316,7 +335,8 @@ def RepVGG_B1g4(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[2, 2, 2, 4], override_groups_map=g4_map, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B1g4"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B1g4"], use_ssld=use_ssld) return model @@ -326,7 +346,8 @@ def RepVGG_B2(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=None, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B2"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B2"], use_ssld=use_ssld) return model @@ -336,7 +357,8 @@ def RepVGG_B2g2(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=g2_map, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B2g2"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B2g2"], use_ssld=use_ssld) return model @@ -346,7 +368,8 @@ def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=g4_map, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B2g4"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B2g4"], use_ssld=use_ssld) return model @@ -356,7 +379,8 @@ def RepVGG_B3(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[3, 3, 3, 5], override_groups_map=None, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B3"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B3"], use_ssld=use_ssld) return model @@ -366,7 +390,8 @@ def RepVGG_B3g2(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[3, 3, 3, 5], override_groups_map=g2_map, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B3g2"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B3g2"], use_ssld=use_ssld) return model @@ -376,5 +401,6 @@ def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs): width_multiplier=[3, 3, 3, 5], override_groups_map=g4_map, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/res2net.py b/ppcls/arch/backbone/model_zoo/res2net.py index 15a9427c2..191cc849c 100644 --- a/ppcls/arch/backbone/model_zoo/res2net.py +++ b/ppcls/arch/backbone/model_zoo/res2net.py @@ -29,9 +29,12 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"Res2Net50_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams", - "Res2Net50_14w_8s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams", - } +MODEL_URLS = { + "Res2Net50_26w_4s": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams", + "Res2Net50_14w_8s": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -151,7 +154,7 @@ class BottleneckBlock(nn.Layer): class Res2Net(nn.Layer): - def __init__(self, layers=50, scales=4, width=26, class_dim=1000): + def __init__(self, layers=50, scales=4, width=26, class_num=1000): super(Res2Net, self).__init__() self.layers = layers @@ -218,7 +221,7 @@ class Res2Net(nn.Layer): self.out = Linear( self.pool2d_avg_channels, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc_weights"), bias_attr=ParamAttr(name="fc_offset")) @@ -245,15 +248,17 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - + def Res2Net50_26w_4s(pretrained=False, use_ssld=False, **kwargs): model = Res2Net(layers=50, scales=4, width=26, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["Res2Net50_26w_4s"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["Res2Net50_26w_4s"], use_ssld=use_ssld) return model def Res2Net50_14w_8s(pretrained=False, use_ssld=False, **kwargs): model = Res2Net(layers=50, scales=8, width=14, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["Res2Net50_14w_8s"], use_ssld=use_ssld) - return model \ No newline at end of file + _load_pretrained( + pretrained, model, MODEL_URLS["Res2Net50_14w_8s"], use_ssld=use_ssld) + return model diff --git a/ppcls/arch/backbone/model_zoo/res2net_vd.py b/ppcls/arch/backbone/model_zoo/res2net_vd.py index 28ab03a01..a37567980 100644 --- a/ppcls/arch/backbone/model_zoo/res2net_vd.py +++ b/ppcls/arch/backbone/model_zoo/res2net_vd.py @@ -29,10 +29,14 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"Res2Net50_vd_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams", - "Res2Net101_vd_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams", - "Res2Net200_vd_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams", - } +MODEL_URLS = { + "Res2Net50_vd_26w_4s": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams", + "Res2Net101_vd_26w_4s": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams", + "Res2Net200_vd_26w_4s": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -159,7 +163,7 @@ class BottleneckBlock(nn.Layer): class Res2Net_vd(nn.Layer): - def __init__(self, layers=50, scales=4, width=26, class_dim=1000): + def __init__(self, layers=50, scales=4, width=26, class_num=1000): super(Res2Net_vd, self).__init__() self.layers = layers @@ -240,7 +244,7 @@ class Res2Net_vd(nn.Layer): self.out = Linear( self.pool2d_avg_channels, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc_weights"), bias_attr=ParamAttr(name="fc_offset")) @@ -273,17 +277,29 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): def Res2Net50_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs): model = Res2Net_vd(layers=50, scales=4, width=26, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["Res2Net50_vd_26w_4s"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["Res2Net50_vd_26w_4s"], + use_ssld=use_ssld) return model def Res2Net101_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs): model = Res2Net_vd(layers=101, scales=4, width=26, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["Res2Net101_vd_26w_4s"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["Res2Net101_vd_26w_4s"], + use_ssld=use_ssld) return model def Res2Net200_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs): model = Res2Net_vd(layers=200, scales=4, width=26, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["Res2Net200_vd_26w_4s"], use_ssld=use_ssld) - return model \ No newline at end of file + _load_pretrained( + pretrained, + model, + MODEL_URLS["Res2Net200_vd_26w_4s"], + use_ssld=use_ssld) + return model diff --git a/ppcls/arch/backbone/model_zoo/resnest.py b/ppcls/arch/backbone/model_zoo/resnest.py index 3160095ef..a414c29f5 100644 --- a/ppcls/arch/backbone/model_zoo/resnest.py +++ b/ppcls/arch/backbone/model_zoo/resnest.py @@ -29,10 +29,14 @@ from paddle.regularizer import L2Decay from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"ResNeSt50_fast_1s1x64d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams", - "ResNeSt50": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams", - "ResNeSt101": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams", - } +MODEL_URLS = { + "ResNeSt50_fast_1s1x64d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams", + "ResNeSt50": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams", + "ResNeSt101": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -453,7 +457,7 @@ class ResNeSt(nn.Layer): avd_first=False, final_drop=0.0, last_gamma=False, - class_dim=1000): + class_num=1000): super(ResNeSt, self).__init__() self.cardinality = groups @@ -643,7 +647,7 @@ class ResNeSt(nn.Layer): self.out = Linear( self.out_channels, - class_dim, + class_num, weight_attr=ParamAttr( initializer=nn.initializer.Uniform(-stdv, stdv), name="fc_weights"), @@ -663,7 +667,7 @@ class ResNeSt(nn.Layer): x = self.out(x) return x - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -675,8 +679,8 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - - + + def ResNeSt50_fast_1s1x64d(pretrained=False, use_ssld=False, **kwargs): model = ResNeSt( layers=[3, 4, 6, 3], @@ -690,7 +694,11 @@ def ResNeSt50_fast_1s1x64d(pretrained=False, use_ssld=False, **kwargs): avd_first=True, final_drop=0.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeSt50_fast_1s1x64d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeSt50_fast_1s1x64d"], + use_ssld=use_ssld) return model @@ -707,7 +715,8 @@ def ResNeSt50(pretrained=False, use_ssld=False, **kwargs): avd_first=False, final_drop=0.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld) return model @@ -724,5 +733,6 @@ def ResNeSt101(pretrained=False, use_ssld=False, **kwargs): avd_first=False, final_drop=0.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/resnet_vc.py b/ppcls/arch/backbone/model_zoo/resnet_vc.py index 53b9f8d5e..6b972dc7b 100644 --- a/ppcls/arch/backbone/model_zoo/resnet_vc.py +++ b/ppcls/arch/backbone/model_zoo/resnet_vc.py @@ -30,8 +30,9 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "ResNet50_vc": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams", - } + "ResNet50_vc": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -177,7 +178,7 @@ class BasicBlock(nn.Layer): class ResNet_vc(nn.Layer): - def __init__(self, layers=50, class_dim=1000): + def __init__(self, layers=50, class_num=1000): super(ResNet_vc, self).__init__() self.layers = layers @@ -270,7 +271,7 @@ class ResNet_vc(nn.Layer): self.out = Linear( self.pool2d_avg_channels, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), bias_attr=ParamAttr(name="fc_0.b_0")) @@ -287,7 +288,7 @@ class ResNet_vc(nn.Layer): y = self.out(y) return y - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -300,9 +301,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): "pretrained type is not available. Please use `string` or `boolean` type." ) - + def ResNet50_vc(pretrained=False, use_ssld=False, **kwargs): model = ResNet_vc(layers=50, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNet50_vc"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNet50_vc"], use_ssld=use_ssld) return model - diff --git a/ppcls/arch/backbone/model_zoo/resnext.py b/ppcls/arch/backbone/model_zoo/resnext.py index 5104b4cba..1aef81144 100644 --- a/ppcls/arch/backbone/model_zoo/resnext.py +++ b/ppcls/arch/backbone/model_zoo/resnext.py @@ -30,13 +30,19 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "ResNeXt50_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams", - "ResNeXt50_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams", - "ResNeXt101_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams", - "ResNeXt101_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams", - "ResNeXt152_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams", - "ResNeXt152_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams", - } + "ResNeXt50_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams", + "ResNeXt50_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams", + "ResNeXt101_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams", + "ResNeXt101_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams", + "ResNeXt152_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams", + "ResNeXt152_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -143,7 +149,12 @@ class BottleneckBlock(nn.Layer): class ResNeXt(nn.Layer): - def __init__(self, layers=50, class_dim=1000, cardinality=32, input_image_channel=3, data_format="NCHW"): + def __init__(self, + layers=50, + class_num=1000, + cardinality=32, + input_image_channel=3, + data_format="NCHW"): super(ResNeXt, self).__init__() self.layers = layers @@ -176,7 +187,8 @@ class ResNeXt(nn.Layer): act='relu', name="res_conv1", data_format=self.data_format) - self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1, data_format=self.data_format) + self.pool2d_max = MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=self.data_format) self.block_list = [] for block in range(len(depth)): @@ -211,7 +223,7 @@ class ResNeXt(nn.Layer): self.out = Linear( self.pool2d_avg_channels, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc_weights"), bias_attr=ParamAttr(name="fc_offset")) @@ -230,7 +242,7 @@ class ResNeXt(nn.Layer): y = self.out(y) return y - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -246,35 +258,41 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): def ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=50, cardinality=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_32x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt50_32x4d"], use_ssld=use_ssld) return model def ResNeXt50_64x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=50, cardinality=64, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_64x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt50_64x4d"], use_ssld=use_ssld) return model def ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=101, cardinality=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt101_32x4d"], use_ssld=use_ssld) return model def ResNeXt101_64x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=101, cardinality=64, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_64x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt101_64x4d"], use_ssld=use_ssld) return model def ResNeXt152_32x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=152, cardinality=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_32x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt152_32x4d"], use_ssld=use_ssld) return model def ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=152, cardinality=64, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_64x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt152_64x4d"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/resnext101_wsl.py b/ppcls/arch/backbone/model_zoo/resnext101_wsl.py index e530a9a2b..2b3c88b55 100644 --- a/ppcls/arch/backbone/model_zoo/resnext101_wsl.py +++ b/ppcls/arch/backbone/model_zoo/resnext101_wsl.py @@ -9,17 +9,19 @@ from paddle.nn.initializer import Uniform from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "ResNeXt101_32x8d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams", - "ResNeXt101_32x16d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x816_wsl_pretrained.pdparams", - "ResNeXt101_32x32d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams", - "ResNeXt101_32x48d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams", - - } + "ResNeXt101_32x8d_wsl": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams", + "ResNeXt101_32x16d_wsl": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x816_wsl_pretrained.pdparams", + "ResNeXt101_32x32d_wsl": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams", + "ResNeXt101_32x48d_wsl": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) - class ConvBNLayer(nn.Layer): def __init__(self, input_channels, @@ -128,10 +130,10 @@ class BottleneckBlock(nn.Layer): class ResNeXt101WSL(nn.Layer): - def __init__(self, layers=101, cardinality=32, width=48, class_dim=1000): + def __init__(self, layers=101, cardinality=32, width=48, class_num=1000): super(ResNeXt101WSL, self).__init__() - self.class_dim = class_dim + self.class_num = class_num self.layers = layers self.cardinality = cardinality @@ -384,7 +386,7 @@ class ResNeXt101WSL(nn.Layer): self._avg_pool = AdaptiveAvgPool2D(1) self._out = Linear( num_filters[3] // (width // 8), - class_dim, + class_num, weight_attr=ParamAttr(name="fc.weight"), bias_attr=ParamAttr(name="fc.bias")) @@ -434,7 +436,7 @@ class ResNeXt101WSL(nn.Layer): x = self._out(x) return x - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -450,23 +452,39 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): def ResNeXt101_32x8d_wsl(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt101WSL(cardinality=32, width=8, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x8d_wsl"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_32x8d_wsl"], + use_ssld=use_ssld) return model def ResNeXt101_32x16d_wsl(**args): model = ResNeXt101WSL(cardinality=32, width=16, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x16d_ws"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_32x16d_ws"], + use_ssld=use_ssld) return model def ResNeXt101_32x32d_wsl(**args): model = ResNeXt101WSL(cardinality=32, width=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x32d_wsl"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_32x32d_wsl"], + use_ssld=use_ssld) return model def ResNeXt101_32x48d_wsl(**args): model = ResNeXt101WSL(cardinality=32, width=48, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x48d_wsl"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_32x48d_wsl"], + use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/resnext_vd.py b/ppcls/arch/backbone/model_zoo/resnext_vd.py index b14e265e9..b2bd484f3 100644 --- a/ppcls/arch/backbone/model_zoo/resnext_vd.py +++ b/ppcls/arch/backbone/model_zoo/resnext_vd.py @@ -30,16 +30,23 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "ResNeXt50_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams", - "ResNeXt50_vd_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams", - "ResNeXt101_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams", - "ResNeXt101_vd_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams", - "ResNeXt152_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams", - "ResNeXt152_vd_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams", - } + "ResNeXt50_vd_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams", + "ResNeXt50_vd_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams", + "ResNeXt101_vd_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams", + "ResNeXt101_vd_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams", + "ResNeXt152_vd_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams", + "ResNeXt152_vd_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) + class ConvBNLayer(nn.Layer): def __init__( self, @@ -145,7 +152,7 @@ class BottleneckBlock(nn.Layer): class ResNeXt(nn.Layer): - def __init__(self, layers=50, class_dim=1000, cardinality=32): + def __init__(self, layers=50, class_num=1000, cardinality=32): super(ResNeXt, self).__init__() self.layers = layers @@ -225,7 +232,7 @@ class ResNeXt(nn.Layer): self.out = Linear( self.pool2d_avg_channels, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc_weights"), bias_attr=ParamAttr(name="fc_offset")) @@ -242,6 +249,7 @@ class ResNeXt(nn.Layer): y = self.out(y) return y + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -257,35 +265,53 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): def ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=50, cardinality=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_vd_32x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt50_vd_32x4d"], use_ssld=use_ssld) return model def ResNeXt50_vd_64x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=50, cardinality=64, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_vd_64x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt50_vd_64x4d"], use_ssld=use_ssld) return model def ResNeXt101_vd_32x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=101, cardinality=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_vd_32x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_vd_32x4d"], + use_ssld=use_ssld) return model def ResNeXt101_vd_64x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=101, cardinality=64, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_vd_64x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_vd_64x4d"], + use_ssld=use_ssld) return model def ResNeXt152_vd_32x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=152, cardinality=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_vd_32x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt152_vd_32x4d"], + use_ssld=use_ssld) return model def ResNeXt152_vd_64x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=152, cardinality=64, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_vd_64x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt152_vd_64x4d"], + use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/rexnet.py b/ppcls/arch/backbone/model_zoo/rexnet.py index 799826c94..039f6c538 100644 --- a/ppcls/arch/backbone/model_zoo/rexnet.py +++ b/ppcls/arch/backbone/model_zoo/rexnet.py @@ -25,12 +25,17 @@ from math import ceil from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "ReXNet_1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams", - "ReXNet_1_3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams", - "ReXNet_1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_32x4d_pretrained.pdparams", - "ReXNet_2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams", - "ReXNet_3_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams", - } + "ReXNet_1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams", + "ReXNet_1_3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams", + "ReXNet_1_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_32x4d_pretrained.pdparams", + "ReXNet_2_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams", + "ReXNet_3_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -154,7 +159,7 @@ class ReXNetV1(nn.Layer): final_ch=180, width_mult=1.0, depth_mult=1.0, - class_dim=1000, + class_num=1000, use_se=True, se_ratio=12, dropout_ratio=0.2, @@ -220,7 +225,7 @@ class ReXNetV1(nn.Layer): self.output = nn.Sequential( nn.Dropout(dropout_ratio), nn.Conv2D( - pen_channels, class_dim, 1, bias_attr=True)) + pen_channels, class_num, 1, bias_attr=True)) def forward(self, x): x = self.features(x) @@ -239,33 +244,38 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - - + + def ReXNet_1_0(pretrained=False, use_ssld=False, **kwargs): model = ReXNetV1(width_mult=1.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_1_0"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ReXNet_1_0"], use_ssld=use_ssld) return model def ReXNet_1_3(pretrained=False, use_ssld=False, **kwargs): model = ReXNetV1(width_mult=1.3, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_1_3"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ReXNet_1_3"], use_ssld=use_ssld) return model def ReXNet_1_5(pretrained=False, use_ssld=False, **kwargs): model = ReXNetV1(width_mult=1.5, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_1_5"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ReXNet_1_5"], use_ssld=use_ssld) return model def ReXNet_2_0(pretrained=False, use_ssld=False, **kwargs): model = ReXNetV1(width_mult=2.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_2_0"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ReXNet_2_0"], use_ssld=use_ssld) return model def ReXNet_3_0(pretrained=False, use_ssld=False, **kwargs): model = ReXNetV1(width_mult=3.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_3_0"], use_ssld=use_ssld) - return model \ No newline at end of file + _load_pretrained( + pretrained, model, MODEL_URLS["ReXNet_3_0"], use_ssld=use_ssld) + return model diff --git a/ppcls/arch/backbone/model_zoo/se_resnet_vd.py b/ppcls/arch/backbone/model_zoo/se_resnet_vd.py index cc48f8d36..205feec54 100644 --- a/ppcls/arch/backbone/model_zoo/se_resnet_vd.py +++ b/ppcls/arch/backbone/model_zoo/se_resnet_vd.py @@ -29,11 +29,13 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "SE_ResNet18_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams", - "SE_ResNet34_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams", - "SE_ResNet50_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams", - - } + "SE_ResNet18_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams", + "SE_ResNet34_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams", + "SE_ResNet50_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -240,7 +242,7 @@ class SELayer(nn.Layer): class SE_ResNet_vd(nn.Layer): - def __init__(self, layers=50, class_dim=1000): + def __init__(self, layers=50, class_num=1000): super(SE_ResNet_vd, self).__init__() self.layers = layers @@ -336,7 +338,7 @@ class SE_ResNet_vd(nn.Layer): self.out = Linear( self.pool2d_avg_channels, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc6_weights"), bias_attr=ParamAttr(name="fc6_offset")) @@ -353,7 +355,7 @@ class SE_ResNet_vd(nn.Layer): y = self.out(y) return y - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -365,21 +367,24 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - + def SE_ResNet18_vd(pretrained=False, use_ssld=False, **kwargs): model = SE_ResNet_vd(layers=18, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNet18_vd"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["SE_ResNet18_vd"], use_ssld=use_ssld) return model def SE_ResNet34_vd(pretrained=False, use_ssld=False, **kwargs): model = SE_ResNet_vd(layers=34, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNet34_vd"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["SE_ResNet34_vd"], use_ssld=use_ssld) return model def SE_ResNet50_vd(pretrained=False, use_ssld=False, **kwargs): model = SE_ResNet_vd(layers=50, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNet50_vd"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["SE_ResNet50_vd"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/se_resnext.py b/ppcls/arch/backbone/model_zoo/se_resnext.py index d873d8111..8b7149e26 100644 --- a/ppcls/arch/backbone/model_zoo/se_resnext.py +++ b/ppcls/arch/backbone/model_zoo/se_resnext.py @@ -30,11 +30,13 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "SE_ResNeXt50_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams", - "SE_ResNeXt101_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams", - "SE_ResNeXt152_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt152_64x4d_pretrained.pdparams", - - } + "SE_ResNeXt50_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams", + "SE_ResNeXt101_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams", + "SE_ResNeXt152_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt152_64x4d_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -148,7 +150,12 @@ class BottleneckBlock(nn.Layer): class SELayer(nn.Layer): - def __init__(self, num_channels, num_filters, reduction_ratio, name=None, data_format="NCHW"): + def __init__(self, + num_channels, + num_filters, + reduction_ratio, + name=None, + data_format="NCHW"): super(SELayer, self).__init__() self.data_format = data_format @@ -193,7 +200,12 @@ class SELayer(nn.Layer): class ResNeXt(nn.Layer): - def __init__(self, layers=50, class_dim=1000, cardinality=32, input_image_channel=3, data_format="NCHW"): + def __init__(self, + layers=50, + class_num=1000, + cardinality=32, + input_image_channel=3, + data_format="NCHW"): super(ResNeXt, self).__init__() self.layers = layers @@ -254,7 +266,8 @@ class ResNeXt(nn.Layer): name="conv3", data_format=self.data_format) - self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1, data_format=self.data_format) + self.pool2d_max = MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=self.data_format) self.block_list = [] n = 1 if layers == 50 or layers == 101 else 3 @@ -286,13 +299,13 @@ class ResNeXt(nn.Layer): self.out = Linear( self.pool2d_avg_channels, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc6_weights"), bias_attr=ParamAttr(name="fc6_offset")) def forward(self, inputs): - with paddle.static.amp.fp16_guard(): + with paddle.static.amp.fp16_guard(): if self.data_format == "NHWC": inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1]) inputs.stop_gradient = True @@ -310,7 +323,7 @@ class ResNeXt(nn.Layer): y = self.out(y) return y - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -322,21 +335,30 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - + def SE_ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=50, cardinality=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt50_32x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["SE_ResNeXt50_32x4d"], use_ssld=use_ssld) return model def SE_ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=101, cardinality=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt101_32x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SE_ResNeXt101_32x4d"], + use_ssld=use_ssld) return model def SE_ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=152, cardinality=64, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt152_64x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SE_ResNeXt152_64x4d"], + use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/se_resnext_vd.py b/ppcls/arch/backbone/model_zoo/se_resnext_vd.py index 5e840f83d..b23b0d2d5 100644 --- a/ppcls/arch/backbone/model_zoo/se_resnext_vd.py +++ b/ppcls/arch/backbone/model_zoo/se_resnext_vd.py @@ -30,11 +30,13 @@ import math from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "SE_ResNeXt50_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams", - "SE_ResNeXt50_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams", - "SENet154_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams", - - } + "SE_ResNeXt50_vd_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams", + "SE_ResNeXt50_vd_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams", + "SENet154_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -184,7 +186,7 @@ class SELayer(nn.Layer): class ResNeXt(nn.Layer): - def __init__(self, layers=50, class_dim=1000, cardinality=32): + def __init__(self, layers=50, class_num=1000, cardinality=32): super(ResNeXt, self).__init__() self.layers = layers @@ -261,7 +263,7 @@ class ResNeXt(nn.Layer): self.out = Linear( self.pool2d_avg_channels, - class_dim, + class_num, weight_attr=ParamAttr( initializer=Uniform(-stdv, stdv), name="fc6_weights"), bias_attr=ParamAttr(name="fc6_offset")) @@ -278,7 +280,7 @@ class ResNeXt(nn.Layer): y = self.out(y) return y - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -290,21 +292,30 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - + def SE_ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=50, cardinality=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt50_vd_32x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SE_ResNeXt50_vd_32x4d"], + use_ssld=use_ssld) return model def SE_ResNeXt101_vd_32x4d(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=101, cardinality=32, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt101_vd_32x4d"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SE_ResNeXt101_vd_32x4d"], + use_ssld=use_ssld) return model def SENet154_vd(pretrained=False, use_ssld=False, **kwargs): model = ResNeXt(layers=152, cardinality=64, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SENet154_vd"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["SENet154_vd"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/shufflenet_v2.py b/ppcls/arch/backbone/model_zoo/shufflenet_v2.py index 29abad66e..d8bb69ffe 100644 --- a/ppcls/arch/backbone/model_zoo/shufflenet_v2.py +++ b/ppcls/arch/backbone/model_zoo/shufflenet_v2.py @@ -25,14 +25,21 @@ from paddle.nn.functional import swish from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "ShuffleNetV2_x0_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams", - "ShuffleNetV2_x0_33": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams", - "ShuffleNetV2_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams", - "ShuffleNetV2_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams", - "ShuffleNetV2_x1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams", - "ShuffleNetV2_x2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams", - "ShuffleNetV2_swish": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams" - } + "ShuffleNetV2_x0_25": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams", + "ShuffleNetV2_x0_33": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams", + "ShuffleNetV2_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams", + "ShuffleNetV2_x1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams", + "ShuffleNetV2_x1_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams", + "ShuffleNetV2_x2_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams", + "ShuffleNetV2_swish": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams" +} __all__ = list(MODEL_URLS.keys()) @@ -207,10 +214,10 @@ class InvertedResidualDS(Layer): class ShuffleNet(Layer): - def __init__(self, class_dim=1000, scale=1.0, act="relu"): + def __init__(self, class_num=1000, scale=1.0, act="relu"): super(ShuffleNet, self).__init__() self.scale = scale - self.class_dim = class_dim + self.class_num = class_num stage_repeats = [4, 8, 4] if scale == 0.25: @@ -277,7 +284,7 @@ class ShuffleNet(Layer): # 5. fc self._fc = Linear( stage_out_channels[-1], - class_dim, + class_num, weight_attr=ParamAttr(name='fc6_weights'), bias_attr=ParamAttr(name='fc6_offset')) @@ -308,41 +315,48 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): def ShuffleNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs): model = ShuffleNet(scale=0.25, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x0_25"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x0_25"], use_ssld=use_ssld) return model def ShuffleNetV2_x0_33(pretrained=False, use_ssld=False, **kwargs): model = ShuffleNet(scale=0.33, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x0_33"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x0_33"], use_ssld=use_ssld) return model def ShuffleNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs): model = ShuffleNet(scale=0.5, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x0_5"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x0_5"], use_ssld=use_ssld) return model def ShuffleNetV2_x1_0(pretrained=False, use_ssld=False, **kwargs): model = ShuffleNet(scale=1.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x1_0"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x1_0"], use_ssld=use_ssld) return model def ShuffleNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs): model = ShuffleNet(scale=1.5, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x1_5"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x1_5"], use_ssld=use_ssld) return model def ShuffleNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs): model = ShuffleNet(scale=2.0, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x2_0"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x2_0"], use_ssld=use_ssld) return model def ShuffleNetV2_swish(pretrained=False, use_ssld=False, **kwargs): model = ShuffleNet(scale=1.0, act="swish", **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_swish"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_swish"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/squeezenet.py b/ppcls/arch/backbone/model_zoo/squeezenet.py index a88a1bcff..647cd2ea7 100644 --- a/ppcls/arch/backbone/model_zoo/squeezenet.py +++ b/ppcls/arch/backbone/model_zoo/squeezenet.py @@ -22,9 +22,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "SqueezeNet1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_0_pretrained.pdparams", - "SqueezeNet1_1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_1_pretrained.pdparams", - } + "SqueezeNet1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_0_pretrained.pdparams", + "SqueezeNet1_1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_1_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -78,7 +80,7 @@ class MakeFire(nn.Layer): class SqueezeNet(nn.Layer): - def __init__(self, version, class_dim=1000): + def __init__(self, version, class_num=1000): super(SqueezeNet, self).__init__() self.version = version @@ -125,7 +127,7 @@ class SqueezeNet(nn.Layer): self._drop = Dropout(p=0.5, mode="downscale_in_infer") self._conv9 = Conv2D( 512, - class_dim, + class_num, 1, weight_attr=ParamAttr(name="conv10_weights"), bias_attr=ParamAttr(name="conv10_offset")) @@ -164,6 +166,7 @@ class SqueezeNet(nn.Layer): x = paddle.squeeze(x, axis=[2, 3]) return x + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -175,15 +178,17 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - + def SqueezeNet1_0(pretrained=False, use_ssld=False, **kwargs): model = SqueezeNet(version="1.0", **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SqueezeNet1_0"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["SqueezeNet1_0"], use_ssld=use_ssld) return model def SqueezeNet1_1(pretrained=False, use_ssld=False, **kwargs): model = SqueezeNet(version="1.1", **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SqueezeNet1_1"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["SqueezeNet1_1"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/swin_transformer.py b/ppcls/arch/backbone/model_zoo/swin_transformer.py index a33bf5888..f4348fbae 100644 --- a/ppcls/arch/backbone/model_zoo/swin_transformer.py +++ b/ppcls/arch/backbone/model_zoo/swin_transformer.py @@ -24,13 +24,19 @@ from .vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPat from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "SwinTransformer_tiny_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams", - "SwinTransformer_small_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_small_patch4_window7_224_pretrained.pdparams", - "SwinTransformer_base_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_pretrained.pdparams", - "SwinTransformer_base_patch4_window12_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams", - "SwinTransformer_large_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_pretrained.pdparams", - "SwinTransformer_large_patch4_window12_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_pretrained.pdparams", - } + "SwinTransformer_tiny_patch4_window7_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams", + "SwinTransformer_small_patch4_window7_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_small_patch4_window7_224_pretrained.pdparams", + "SwinTransformer_base_patch4_window7_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_pretrained.pdparams", + "SwinTransformer_base_patch4_window12_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams", + "SwinTransformer_large_patch4_window7_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_pretrained.pdparams", + "SwinTransformer_large_patch4_window12_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_pretrained.pdparams", +} __all__ = list(MODEL_URLS.keys()) @@ -611,7 +617,7 @@ class SwinTransformer(nn.Layer): img_size=224, patch_size=4, in_chans=3, - class_dim=1000, + class_num=1000, embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], @@ -629,7 +635,7 @@ class SwinTransformer(nn.Layer): **kwargs): super(SwinTransformer, self).__init__() - self.num_classes = num_classes = class_dim + self.num_classes = num_classes = class_num self.num_layers = len(depths) self.embed_dim = embed_dim self.ape = ape @@ -729,7 +735,7 @@ class SwinTransformer(nn.Layer): flops += self.num_features * self.num_classes return flops - + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -743,7 +749,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): ) -def SwinTransformer_tiny_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs): +def SwinTransformer_tiny_patch4_window7_224(pretrained=False, + use_ssld=False, + **kwargs): model = SwinTransformer( embed_dim=96, depths=[2, 2, 6, 2], @@ -751,22 +759,34 @@ def SwinTransformer_tiny_patch4_window7_224(pretrained=False, use_ssld=False, ** window_size=7, drop_path_rate=0.2, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_tiny_patch4_window7_224"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_tiny_patch4_window7_224"], + use_ssld=use_ssld) return model -def SwinTransformer_small_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs): +def SwinTransformer_small_patch4_window7_224(pretrained=False, + use_ssld=False, + **kwargs): model = SwinTransformer( embed_dim=96, depths=[2, 2, 18, 2], num_heads=[3, 6, 12, 24], window_size=7, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_small_patch4_window7_224"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_small_patch4_window7_224"], + use_ssld=use_ssld) return model -def SwinTransformer_base_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs): +def SwinTransformer_base_patch4_window7_224(pretrained=False, + use_ssld=False, + **kwargs): model = SwinTransformer( embed_dim=128, depths=[2, 2, 18, 2], @@ -774,11 +794,17 @@ def SwinTransformer_base_patch4_window7_224(pretrained=False, use_ssld=False, ** window_size=7, drop_path_rate=0.5, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_base_patch4_window7_224"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_base_patch4_window7_224"], + use_ssld=use_ssld) return model -def SwinTransformer_base_patch4_window12_384(pretrained=False, use_ssld=False, **kwargs): +def SwinTransformer_base_patch4_window12_384(pretrained=False, + use_ssld=False, + **kwargs): model = SwinTransformer( img_size=384, embed_dim=128, @@ -787,22 +813,34 @@ def SwinTransformer_base_patch4_window12_384(pretrained=False, use_ssld=False, * window_size=12, drop_path_rate=0.5, # NOTE: do not appear in offical code **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_base_patch4_window12_384"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_base_patch4_window12_384"], + use_ssld=use_ssld) return model -def SwinTransformer_large_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs): +def SwinTransformer_large_patch4_window7_224(pretrained=False, + use_ssld=False, + **kwargs): model = SwinTransformer( embed_dim=192, depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48], window_size=7, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_large_patch4_window7_224"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_large_patch4_window7_224"], + use_ssld=use_ssld) return model -def SwinTransformer_large_patch4_window12_384(pretrained=False, use_ssld=False, **kwargs): +def SwinTransformer_large_patch4_window12_384(pretrained=False, + use_ssld=False, + **kwargs): model = SwinTransformer( img_size=384, embed_dim=192, @@ -810,5 +848,9 @@ def SwinTransformer_large_patch4_window12_384(pretrained=False, use_ssld=False, num_heads=[6, 12, 24, 48], window_size=12, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_large_patch4_window12_384"], use_ssld=use_ssld) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_large_patch4_window12_384"], + use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/tnt.py b/ppcls/arch/backbone/model_zoo/tnt.py index 61f1083e4..9990584dc 100644 --- a/ppcls/arch/backbone/model_zoo/tnt.py +++ b/ppcls/arch/backbone/model_zoo/tnt.py @@ -23,16 +23,13 @@ from paddle.nn.initializer import TruncatedNormal, Constant from ppcls.arch.backbone.base.theseus_layer import Identity from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url - MODEL_URLS = { "TNT_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams" } - __all__ = MODEL_URLS.keys() - trunc_normal_ = TruncatedNormal(std=.02) zeros_ = Constant(value=0.) ones_ = Constant(value=1.) @@ -66,8 +63,12 @@ class DropPath(nn.Layer): class Mlp(nn.Layer): - def __init__(self, in_features, hidden_features=None, - out_features=None, act_layer=nn.GELU, drop=0.): + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features @@ -86,14 +87,19 @@ class Mlp(nn.Layer): class Attention(nn.Layer): - def __init__(self, dim, hidden_dim, num_heads=8, - qkv_bias=False, attn_drop=0., proj_drop=0.): + def __init__(self, + dim, + hidden_dim, + num_heads=8, + qkv_bias=False, + attn_drop=0., + proj_drop=0.): super().__init__() self.hidden_dim = hidden_dim self.num_heads = num_heads head_dim = hidden_dim // num_heads self.head_dim = head_dim - self.scale = head_dim ** -0.5 + self.scale = head_dim**-0.5 self.qk = nn.Linear(dim, hidden_dim * 2, bias_attr=qkv_bias) self.v = nn.Linear(dim, dim, bias_attr=qkv_bias) @@ -103,73 +109,103 @@ class Attention(nn.Layer): def forward(self, x): B, N, C = x.shape - qk = self.qk(x).reshape((B, N, 2, self.num_heads, self.head_dim)).transpose((2, 0, 3, 1, 4)) + qk = self.qk(x).reshape( + (B, N, 2, self.num_heads, self.head_dim)).transpose( + (2, 0, 3, 1, 4)) q, k = qk[0], qk[1] - v = self.v(x).reshape((B, N, self.num_heads, -1)).transpose((0, 2, 1, 3)) + v = self.v(x).reshape((B, N, self.num_heads, -1)).transpose( + (0, 2, 1, 3)) - attn = (q @ k.transpose((0, 1, 3, 2))) * self.scale + attn = (q @k.transpose((0, 1, 3, 2))) * self.scale attn = nn.functional.softmax(attn, axis=-1) attn = self.attn_drop(attn) - x = (attn @ v).transpose((0, 2, 1, 3)).reshape((B, N, -1)) + x = (attn @v).transpose((0, 2, 1, 3)).reshape((B, N, -1)) x = self.proj(x) x = self.proj_drop(x) return x class Block(nn.Layer): - def __init__(self, dim, in_dim, num_pixel, num_heads=12, in_num_head=4, mlp_ratio=4., - qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, + def __init__(self, + dim, + in_dim, + num_pixel, + num_heads=12, + in_num_head=4, + mlp_ratio=4., + qkv_bias=False, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, norm_layer=nn.LayerNorm): super().__init__() # Inner transformer self.norm_in = norm_layer(in_dim) self.attn_in = Attention( - in_dim, in_dim, num_heads=in_num_head, - qkv_bias=qkv_bias, attn_drop=attn_drop, - proj_drop=drop - ) + in_dim, + in_dim, + num_heads=in_num_head, + qkv_bias=qkv_bias, + attn_drop=attn_drop, + proj_drop=drop) self.norm_mlp_in = norm_layer(in_dim) - self.mlp_in = Mlp( - in_features=in_dim, hidden_features=int(in_dim * 4), - out_features=in_dim, act_layer=act_layer, drop=drop - ) + self.mlp_in = Mlp(in_features=in_dim, + hidden_features=int(in_dim * 4), + out_features=in_dim, + act_layer=act_layer, + drop=drop) self.norm1_proj = norm_layer(in_dim) self.proj = nn.Linear(in_dim * num_pixel, dim) # Outer transformer self.norm_out = norm_layer(dim) self.attn_out = Attention( - dim, dim, num_heads=num_heads, qkv_bias=qkv_bias, - attn_drop=attn_drop, proj_drop=drop - ) + dim, + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + attn_drop=attn_drop, + proj_drop=drop) self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() self.norm_mlp = norm_layer(dim) - self.mlp = Mlp( - in_features=dim, hidden_features=int(dim * mlp_ratio), - out_features=dim, act_layer=act_layer, drop=drop - ) + self.mlp = Mlp(in_features=dim, + hidden_features=int(dim * mlp_ratio), + out_features=dim, + act_layer=act_layer, + drop=drop) def forward(self, pixel_embed, patch_embed): # inner - pixel_embed = pixel_embed + self.drop_path(self.attn_in(self.norm_in(pixel_embed))) - pixel_embed = pixel_embed + self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed))) + pixel_embed = pixel_embed + self.drop_path( + self.attn_in(self.norm_in(pixel_embed))) + pixel_embed = pixel_embed + self.drop_path( + self.mlp_in(self.norm_mlp_in(pixel_embed))) # outer B, N, C = patch_embed.shape - patch_embed[:, 1:] = patch_embed[:, 1:] + self.proj(self.norm1_proj(pixel_embed).reshape((B, N - 1, -1))) - patch_embed = patch_embed + self.drop_path(self.attn_out(self.norm_out(patch_embed))) - patch_embed = patch_embed + self.drop_path(self.mlp(self.norm_mlp(patch_embed))) + patch_embed[:, 1:] = patch_embed[:, 1:] + self.proj( + self.norm1_proj(pixel_embed).reshape((B, N - 1, -1))) + patch_embed = patch_embed + self.drop_path( + self.attn_out(self.norm_out(patch_embed))) + patch_embed = patch_embed + self.drop_path( + self.mlp(self.norm_mlp(patch_embed))) return pixel_embed, patch_embed class PixelEmbed(nn.Layer): - def __init__(self, img_size=224, patch_size=16, in_chans=3, in_dim=48, stride=4): + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + in_dim=48, + stride=4): super().__init__() - num_patches = (img_size // patch_size) ** 2 + num_patches = (img_size // patch_size)**2 self.img_size = img_size self.num_patches = num_patches self.in_dim = in_dim @@ -177,10 +213,7 @@ class PixelEmbed(nn.Layer): self.new_patch_size = new_patch_size self.proj = nn.Conv2D( - in_chans, self.in_dim, - kernel_size=7, padding=3, - stride=stride - ) + in_chans, self.in_dim, kernel_size=7, padding=3, stride=stride) def forward(self, x, pixel_pos): B, C, H, W = x.shape @@ -188,50 +221,64 @@ class PixelEmbed(nn.Layer): x = self.proj(x) x = nn.functional.unfold(x, self.new_patch_size, self.new_patch_size) - x = x.transpose((0, 2, 1)).reshape((B * self.num_patches, self.in_dim, self.new_patch_size, self.new_patch_size)) + x = x.transpose((0, 2, 1)).reshape( + (B * self.num_patches, self.in_dim, self.new_patch_size, + self.new_patch_size)) x = x + pixel_pos - x = x.reshape((B * self.num_patches, self.in_dim, -1)).transpose((0, 2, 1)) + x = x.reshape((B * self.num_patches, self.in_dim, -1)).transpose( + (0, 2, 1)) return x class TNT(nn.Layer): - def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, in_dim=48, depth=12, - num_heads=12, in_num_head=4, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0., - drop_path_rate=0., norm_layer=nn.LayerNorm, first_stride=4, class_dim=1000): + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + embed_dim=768, + in_dim=48, + depth=12, + num_heads=12, + in_num_head=4, + mlp_ratio=4., + qkv_bias=False, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_layer=nn.LayerNorm, + first_stride=4, + class_num=1000): super().__init__() - self.class_dim = class_dim + self.class_num = class_num # num_features for consistency with other models self.num_features = self.embed_dim = embed_dim self.pixel_embed = PixelEmbed( - img_size=img_size, patch_size=patch_size, - in_chans=in_chans, in_dim=in_dim, stride=first_stride - ) + img_size=img_size, + patch_size=patch_size, + in_chans=in_chans, + in_dim=in_dim, + stride=first_stride) num_patches = self.pixel_embed.num_patches self.num_patches = num_patches new_patch_size = self.pixel_embed.new_patch_size - num_pixel = new_patch_size ** 2 + num_pixel = new_patch_size**2 self.norm1_proj = norm_layer(num_pixel * in_dim) self.proj = nn.Linear(num_pixel * in_dim, embed_dim) self.norm2_proj = norm_layer(embed_dim) self.cls_token = self.create_parameter( - shape=(1, 1, embed_dim), - default_initializer=zeros_ - ) + shape=(1, 1, embed_dim), default_initializer=zeros_) self.add_parameter("cls_token", self.cls_token) self.patch_pos = self.create_parameter( - shape=(1, num_patches + 1, embed_dim), - default_initializer=zeros_ - ) + shape=(1, num_patches + 1, embed_dim), default_initializer=zeros_) self.add_parameter("patch_pos", self.patch_pos) self.pixel_pos = self.create_parameter( - shape=(1, in_dim, new_patch_size, new_patch_size), - default_initializer=zeros_ - ) + shape=(1, in_dim, new_patch_size, new_patch_size), + default_initializer=zeros_) self.add_parameter("pixel_pos", self.pixel_pos) self.pos_drop = nn.Dropout(p=drop_rate) @@ -241,17 +288,24 @@ class TNT(nn.Layer): blocks = [] for i in range(depth): - blocks.append(Block( - dim=embed_dim, in_dim=in_dim, num_pixel=num_pixel, num_heads=num_heads, - in_num_head=in_num_head, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, - drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], - norm_layer=norm_layer - )) + blocks.append( + Block( + dim=embed_dim, + in_dim=in_dim, + num_pixel=num_pixel, + num_heads=num_heads, + in_num_head=in_num_head, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i], + norm_layer=norm_layer)) self.blocks = nn.LayerList(blocks) self.norm = norm_layer(embed_dim) - if class_dim > 0: - self.head = nn.Linear(embed_dim, class_dim) + if class_num > 0: + self.head = nn.Linear(embed_dim, class_num) trunc_normal_(self.cls_token) trunc_normal_(self.patch_pos) @@ -271,8 +325,12 @@ class TNT(nn.Layer): B = x.shape[0] pixel_embed = self.pixel_embed(x, self.pixel_pos) - patch_embed = self.norm2_proj(self.proj(self.norm1_proj(pixel_embed.reshape((B, self.num_patches, -1))))) - patch_embed = paddle.concat((self.cls_token.expand((B, -1, -1)), patch_embed), axis=1) + patch_embed = self.norm2_proj( + self.proj( + self.norm1_proj( + pixel_embed.reshape((B, self.num_patches, -1))))) + patch_embed = paddle.concat( + (self.cls_token.expand((B, -1, -1)), patch_embed), axis=1) patch_embed = patch_embed + self.patch_pos patch_embed = self.pos_drop(patch_embed) @@ -285,7 +343,7 @@ class TNT(nn.Layer): def forward(self, x): x = self.forward_features(x) - if self.class_dim > 0: + if self.class_num > 0: x = self.head(x) return x @@ -304,15 +362,13 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): def TNT_small(pretrained=False, **kwargs): - model = TNT( - patch_size=16, - embed_dim=384, - in_dim=24, - depth=12, - num_heads=6, - in_num_head=4, - qkv_bias=False, - **kwargs - ) + model = TNT(patch_size=16, + embed_dim=384, + in_dim=24, + depth=12, + num_heads=6, + in_num_head=4, + qkv_bias=False, + **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["TNT_small"]) return model diff --git a/ppcls/arch/backbone/model_zoo/vision_transformer.py b/ppcls/arch/backbone/model_zoo/vision_transformer.py index ebe77684e..75d767be4 100644 --- a/ppcls/arch/backbone/model_zoo/vision_transformer.py +++ b/ppcls/arch/backbone/model_zoo/vision_transformer.py @@ -231,7 +231,7 @@ class VisionTransformer(nn.Layer): img_size=224, patch_size=16, in_chans=3, - class_dim=1000, + class_num=1000, embed_dim=768, depth=12, num_heads=12, @@ -245,7 +245,7 @@ class VisionTransformer(nn.Layer): epsilon=1e-5, **args): super().__init__() - self.class_dim = class_dim + self.class_num = class_num self.num_features = self.embed_dim = embed_dim @@ -284,7 +284,7 @@ class VisionTransformer(nn.Layer): # Classifier head self.head = nn.Linear(embed_dim, - class_dim) if class_dim > 0 else Identity() + class_num) if class_num > 0 else Identity() trunc_normal_(self.pos_embed) trunc_normal_(self.cls_token) diff --git a/ppcls/arch/backbone/model_zoo/xception.py b/ppcls/arch/backbone/model_zoo/xception.py index 126c3dfdb..2b843788b 100644 --- a/ppcls/arch/backbone/model_zoo/xception.py +++ b/ppcls/arch/backbone/model_zoo/xception.py @@ -8,14 +8,16 @@ from paddle.nn.initializer import Uniform import math import sys - from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "Xception41": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_pretrained.pdparams", - "Xception65": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_pretrained.pdparams", - "Xception71": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception71_pretrained.pdparams" - } + "Xception41": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_pretrained.pdparams", + "Xception65": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_pretrained.pdparams", + "Xception71": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception71_pretrained.pdparams" +} __all__ = list(MODEL_URLS.keys()) @@ -290,7 +292,7 @@ class ExitFlowBottleneckBlock(nn.Layer): class ExitFlow(nn.Layer): - def __init__(self, class_dim): + def __init__(self, class_num): super(ExitFlow, self).__init__() name = "exit_flow" @@ -303,7 +305,7 @@ class ExitFlow(nn.Layer): stdv = 1.0 / math.sqrt(2048 * 1.0) self._out = Linear( 2048, - class_dim, + class_num, weight_attr=ParamAttr( name="fc_weights", initializer=Uniform(-stdv, stdv)), bias_attr=ParamAttr(name="fc_offset")) @@ -324,13 +326,13 @@ class Xception(nn.Layer): def __init__(self, entry_flow_block_num=3, middle_flow_block_num=8, - class_dim=1000): + class_num=1000): super(Xception, self).__init__() self.entry_flow_block_num = entry_flow_block_num self.middle_flow_block_num = middle_flow_block_num self._entry_flow = EntryFlow(entry_flow_block_num) self._middle_flow = MiddleFlow(middle_flow_block_num) - self._exit_flow = ExitFlow(class_dim) + self._exit_flow = ExitFlow(class_num) def forward(self, inputs): x = self._entry_flow(inputs) @@ -338,6 +340,7 @@ class Xception(nn.Layer): x = self._exit_flow(x) return x + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -349,21 +352,26 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) - + def Xception41(pretrained=False, use_ssld=False, **kwargs): model = Xception(entry_flow_block_num=3, middle_flow_block_num=8, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["Xception41"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["Xception41"], use_ssld=use_ssld) return model def Xception65(pretrained=False, use_ssld=False, **kwargs): - model = Xception(entry_flow_block_num=3, middle_flow_block_num=16, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld) + model = Xception( + entry_flow_block_num=3, middle_flow_block_num=16, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld) return model def Xception71(pretrained=False, use_ssld=False, **kwargs): - model = Xception(entry_flow_block_num=5, middle_flow_block_num=16, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld) + model = Xception( + entry_flow_block_num=5, middle_flow_block_num=16, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld) return model diff --git a/ppcls/arch/backbone/model_zoo/xception_deeplab.py b/ppcls/arch/backbone/model_zoo/xception_deeplab.py index dc8c234ec..c52769b37 100644 --- a/ppcls/arch/backbone/model_zoo/xception_deeplab.py +++ b/ppcls/arch/backbone/model_zoo/xception_deeplab.py @@ -21,8 +21,12 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url -MODEL_URLS = {"Xception41_deeplab": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_deeplab_pretrained.pdparams", - "Xception65_deeplab": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_deeplab_pretrained.pdparams"} +MODEL_URLS = { + "Xception41_deeplab": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_deeplab_pretrained.pdparams", + "Xception65_deeplab": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_deeplab_pretrained.pdparams" +} __all__ = list(MODEL_URLS.keys()) @@ -268,7 +272,7 @@ class Xception_Block(nn.Layer): class XceptionDeeplab(nn.Layer): - def __init__(self, backbone, class_dim=1000): + def __init__(self, backbone, class_num=1000): super(XceptionDeeplab, self).__init__() bottleneck_params = gen_bottleneck_params(backbone) @@ -370,7 +374,7 @@ class XceptionDeeplab(nn.Layer): self._pool = AdaptiveAvgPool2D(1) self._fc = Linear( self.chns[1][-1], - class_dim, + class_num, weight_attr=ParamAttr(name="fc_weights"), bias_attr=ParamAttr(name="fc_bias")) @@ -388,8 +392,8 @@ class XceptionDeeplab(nn.Layer): x = paddle.squeeze(x, axis=[2, 3]) x = self._fc(x) return x - - + + def _load_pretrained(pretrained, model, model_url, use_ssld=False): if pretrained is False: pass @@ -405,11 +409,13 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): def Xception41_deeplab(pretrained=False, use_ssld=False, **kwargs): model = XceptionDeeplab('xception_41', **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["Xception41_deeplab"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["Xception41_deeplab"], use_ssld=use_ssld) return model def Xception65_deeplab(pretrained=False, use_ssld=False, **kwargs): model = XceptionDeeplab("xception_65", **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["Xception65_deeplab"], use_ssld=use_ssld) + _load_pretrained( + pretrained, model, MODEL_URLS["Xception65_deeplab"], use_ssld=use_ssld) return model