Merge pull request #987 from littletomatodonkey/dev/fix_dev_dim

fix class_dim
2021-06-29 18:20:01 +08:00 · 2021-06-29 18:20:01 +08:00 · e744db271b
parent b414737545 4d496f787b
commit e744db271b
36 changed files with 1332 additions and 761 deletions
--- a/ppcls/arch/backbone/model_zoo/alexnet.py
+++ b/ppcls/arch/backbone/model_zoo/alexnet.py
@ -23,10 +23,14 @@ import math

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"AlexNet": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams"}
+MODEL_URLS = {
+    "AlexNet":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams"
+}

 __all__ = list(MODEL_URLS.keys())

+
 class ConvPoolLayer(nn.Layer):
    def __init__(self,
                 input_channels,
@ -64,7 +68,7 @@ class ConvPoolLayer(nn.Layer):


 class AlexNetDY(nn.Layer):
-    def __init__(self, class_dim=1000):
+    def __init__(self, class_num=1000):
        super(AlexNetDY, self).__init__()

        stdv = 1.0 / math.sqrt(3 * 11 * 11)
@ -119,7 +123,7 @@ class AlexNetDY(nn.Layer):
                name="fc7_offset", initializer=Uniform(-stdv, stdv)))
        self._fc8 = Linear(
            in_features=4096,
-            out_features=class_dim,
+            out_features=class_num,
            weight_attr=ParamAttr(
                name="fc8_weights", initializer=Uniform(-stdv, stdv)),
            bias_attr=ParamAttr(
@ -143,6 +147,7 @@ class AlexNetDY(nn.Layer):
        x = self._fc8(x)
        return x

+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
    if pretrained is False:
        pass
@ -155,7 +160,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
            "pretrained type is not available. Please use `string` or `boolean` type."
        )

+
 def AlexNet(pretrained=False, use_ssld=False, **kwargs):
    model = AlexNetDY(**kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/darknet.py
+++ b/ppcls/arch/backbone/model_zoo/darknet.py
@ -23,10 +23,14 @@ import math

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"DarkNet53": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams"}
+MODEL_URLS = {
+    "DarkNet53":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams"
+}

 __all__ = list(MODEL_URLS.keys())

+
 class ConvBNLayer(nn.Layer):
    def __init__(self,
                 input_channels,
@ -77,7 +81,7 @@ class BasicBlock(nn.Layer):


 class DarkNet(nn.Layer):
-    def __init__(self, class_dim=1000):
+    def __init__(self, class_num=1000):
        super(DarkNet, self).__init__()

        self.stages = [1, 2, 8, 8, 4]
@ -126,7 +130,7 @@ class DarkNet(nn.Layer):
        stdv = 1.0 / math.sqrt(1024.0)
        self._out = Linear(
            1024,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                name="fc_weights", initializer=Uniform(-stdv, stdv)),
            bias_attr=ParamAttr(name="fc_offset"))
@ -172,6 +176,7 @@ class DarkNet(nn.Layer):
        x = self._out(x)
        return x

+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
    if pretrained is False:
        pass
@ -184,7 +189,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
            "pretrained type is not available. Please use `string` or `boolean` type."
        )

+
 def DarkNet53(pretrained=False, use_ssld=False, **kwargs):
    model = DarkNet(**kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/densenet.py
+++ b/ppcls/arch/backbone/model_zoo/densenet.py
@ -28,12 +28,18 @@ import math

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"DenseNet121": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams",
-              "DenseNet161": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams",
-              "DenseNet169": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams",
-              "DenseNet201": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams",
-              "DenseNet264": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "DenseNet121":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams",
+    "DenseNet161":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams",
+    "DenseNet169":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams",
+    "DenseNet201":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams",
+    "DenseNet264":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -196,7 +202,7 @@ class ConvBNLayer(nn.Layer):


 class DenseNet(nn.Layer):
-    def __init__(self, layers=60, bn_size=4, dropout=0, class_dim=1000):
+    def __init__(self, layers=60, bn_size=4, dropout=0, class_num=1000):
        super(DenseNet, self).__init__()

        supported_layers = [121, 161, 169, 201, 264]
@ -269,7 +275,7 @@ class DenseNet(nn.Layer):

        self.out = Linear(
            num_features,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc_weights"),
            bias_attr=ParamAttr(name="fc_offset"))
@ -289,6 +295,7 @@ class DenseNet(nn.Layer):
        y = self.out(y)
        return y

+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
    if pretrained is False:
        pass
@ -301,31 +308,37 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
            "pretrained type is not available. Please use `string` or `boolean` type."
        )

+
 def DenseNet121(pretrained=False, use_ssld=False, **kwargs):
    model = DenseNet(layers=121, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld)
    return model


 def DenseNet161(pretrained=False, use_ssld=False, **kwargs):
    model = DenseNet(layers=161, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld)
    return model


 def DenseNet169(pretrained=False, use_ssld=False, **kwargs):
    model = DenseNet(layers=169, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld)
    return model


 def DenseNet201(pretrained=False, use_ssld=False, **kwargs):
    model = DenseNet(layers=201, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld)
    return model


 def DenseNet264(pretrained=False, use_ssld=False, **kwargs):
    model = DenseNet(layers=264, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
@ -19,15 +19,23 @@ from .vision_transformer import VisionTransformer, Identity, trunc_normal_, zero
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "DeiT_tiny_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams",
-              "DeiT_small_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams",
-              "DeiT_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams",
-              "DeiT_tiny_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams",
-              "DeiT_small_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams",
-              "DeiT_base_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams", 
-              "DeiT_base_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams",
-              "DeiT_base_distilled_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams",
-             }
+    "DeiT_tiny_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams",
+    "DeiT_small_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams",
+    "DeiT_base_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams",
+    "DeiT_tiny_distilled_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams",
+    "DeiT_small_distilled_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams",
+    "DeiT_base_distilled_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams",
+    "DeiT_base_patch16_384":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams",
+    "DeiT_base_distilled_patch16_384":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -36,7 +44,7 @@ class DistilledVisionTransformer(VisionTransformer):
    def __init__(self,
                 img_size=224,
                 patch_size=16,
-                 class_dim=1000,
+                 class_num=1000,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
@ -48,7 +56,7 @@ class DistilledVisionTransformer(VisionTransformer):
        super().__init__(
            img_size=img_size,
            patch_size=patch_size,
-            class_dim=class_dim,
+            class_num=class_num,
            embed_dim=embed_dim,
            depth=depth,
            num_heads=num_heads,
@ -68,7 +76,7 @@ class DistilledVisionTransformer(VisionTransformer):

        self.head_dist = nn.Linear(
            self.embed_dim,
-            self.class_dim) if self.class_dim > 0 else Identity()
+            self.class_num) if self.class_num > 0 else Identity()

        trunc_normal_(self.dist_token)
        trunc_normal_(self.pos_embed)
@ -121,7 +129,11 @@ def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_tiny_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_tiny_patch16_224"],
+        use_ssld=use_ssld)
    return model


@ -135,7 +147,11 @@ def DeiT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_small_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_small_patch16_224"],
+        use_ssld=use_ssld)
    return model


@ -149,11 +165,16 @@ def DeiT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_base_patch16_224"],
+        use_ssld=use_ssld)
    return model


-def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
+def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False,
+                                    **kwargs):
    model = DistilledVisionTransformer(
        patch_size=16,
        embed_dim=192,
@ -163,11 +184,17 @@ def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_tiny_distilled_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_tiny_distilled_patch16_224"],
+        use_ssld=use_ssld)
    return model


-def DeiT_small_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
+def DeiT_small_distilled_patch16_224(pretrained=False,
+                                     use_ssld=False,
+                                     **kwargs):
    model = DistilledVisionTransformer(
        patch_size=16,
        embed_dim=384,
@ -177,11 +204,16 @@ def DeiT_small_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs)
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_small_distilled_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_small_distilled_patch16_224"],
+        use_ssld=use_ssld)
    return model


-def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
+def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False,
+                                    **kwargs):
    model = DistilledVisionTransformer(
        patch_size=16,
        embed_dim=768,
@ -191,7 +223,11 @@ def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_distilled_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_base_distilled_patch16_224"],
+        use_ssld=use_ssld)
    return model


@ -206,11 +242,16 @@ def DeiT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_patch16_384"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_base_patch16_384"],
+        use_ssld=use_ssld)
    return model


-def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False, **kwargs):
+def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False,
+                                    **kwargs):
    model = DistilledVisionTransformer(
        img_size=384,
        patch_size=16,
@ -221,5 +262,9 @@ def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False, **kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_distilled_patch16_384"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_base_distilled_patch16_384"],
+        use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/dla.py
+++ b/ppcls/arch/backbone/model_zoo/dla.py
@ -23,7 +23,6 @@ from paddle.nn.initializer import Normal, Constant
 from ppcls.arch.backbone.base.theseus_layer import Identity
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-
 MODEL_URLS = {
    "DLA34":
    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams",
@ -47,10 +46,8 @@ MODEL_URLS = {
    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams"
 }

-
 __all__ = MODEL_URLS.keys()

-
 zeros_ = Constant(value=0.)
 ones_ = Constant(value=1.)

@ -59,15 +56,23 @@ class DlaBasic(nn.Layer):
    def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs):
        super(DlaBasic, self).__init__()
        self.conv1 = nn.Conv2D(
-            inplanes, planes, kernel_size=3, stride=stride,
-            padding=dilation, bias_attr=False, dilation=dilation
-        )
+            inplanes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=dilation,
+            bias_attr=False,
+            dilation=dilation)
        self.bn1 = nn.BatchNorm2D(planes)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2D(
-            planes, planes, kernel_size=3, stride=1,
-            padding=dilation, bias_attr=False, dilation=dilation
-        )
+            planes,
+            planes,
+            kernel_size=3,
+            stride=1,
+            padding=dilation,
+            bias_attr=False,
+            dilation=dilation)
        self.bn2 = nn.BatchNorm2D(planes)
        self.stride = stride

@ -91,23 +96,34 @@ class DlaBasic(nn.Layer):
 class DlaBottleneck(nn.Layer):
    expansion = 2

-    def __init__(self, inplanes, outplanes, stride=1,
-                 dilation=1, cardinality=1, base_width=64):
+    def __init__(self,
+                 inplanes,
+                 outplanes,
+                 stride=1,
+                 dilation=1,
+                 cardinality=1,
+                 base_width=64):
        super(DlaBottleneck, self).__init__()
        self.stride = stride
-        mid_planes = int(math.floor(
-            outplanes * (base_width / 64)) * cardinality)
+        mid_planes = int(
+            math.floor(outplanes * (base_width / 64)) * cardinality)
        mid_planes = mid_planes // self.expansion

-        self.conv1 = nn.Conv2D(inplanes, mid_planes, kernel_size=1, bias_attr=False)
+        self.conv1 = nn.Conv2D(
+            inplanes, mid_planes, kernel_size=1, bias_attr=False)
        self.bn1 = nn.BatchNorm2D(mid_planes)
        self.conv2 = nn.Conv2D(
-            mid_planes, mid_planes, kernel_size=3, 
-            stride=stride, padding=dilation, bias_attr=False, 
-            dilation=dilation, groups=cardinality
-        )
+            mid_planes,
+            mid_planes,
+            kernel_size=3,
+            stride=stride,
+            padding=dilation,
+            bias_attr=False,
+            dilation=dilation,
+            groups=cardinality)
        self.bn2 = nn.BatchNorm2D(mid_planes)
-        self.conv3 = nn.Conv2D(mid_planes, outplanes, kernel_size=1, bias_attr=False)
+        self.conv3 = nn.Conv2D(
+            mid_planes, outplanes, kernel_size=1, bias_attr=False)
        self.bn3 = nn.BatchNorm2D(outplanes)
        self.relu = nn.ReLU()

@ -136,9 +152,12 @@ class DlaRoot(nn.Layer):
    def __init__(self, in_channels, out_channels, kernel_size, residual):
        super(DlaRoot, self).__init__()
        self.conv = nn.Conv2D(
-            in_channels, out_channels, 1, stride=1, 
-            bias_attr=False, padding=(kernel_size - 1) // 2
-        )
+            in_channels,
+            out_channels,
+            1,
+            stride=1,
+            bias_attr=False,
+            padding=(kernel_size - 1) // 2)
        self.bn = nn.BatchNorm2D(out_channels)
        self.relu = nn.ReLU()
        self.residual = residual
@ -155,9 +174,18 @@ class DlaRoot(nn.Layer):


 class DlaTree(nn.Layer):
-    def __init__(self, levels, block, in_channels, out_channels, 
-                 stride=1,dilation=1, cardinality=1, base_width=64,
-                 level_root=False, root_dim=0, root_kernel_size=1, 
+    def __init__(self,
+                 levels,
+                 block,
+                 in_channels,
+                 out_channels,
+                 stride=1,
+                 dilation=1,
+                 cardinality=1,
+                 base_width=64,
+                 level_root=False,
+                 root_dim=0,
+                 root_kernel_size=1,
                 root_residual=False):
        super(DlaTree, self).__init__()
        if root_dim == 0:
@ -168,28 +196,45 @@ class DlaTree(nn.Layer):
        self.downsample = nn.MaxPool2D(
            stride, stride=stride) if stride > 1 else Identity()
        self.project = Identity()
-        cargs = dict(dilation=dilation, cardinality=cardinality, base_width=base_width)
+        cargs = dict(
+            dilation=dilation, cardinality=cardinality, base_width=base_width)

        if levels == 1:
            self.tree1 = block(in_channels, out_channels, stride, **cargs)
            self.tree2 = block(out_channels, out_channels, 1, **cargs)
            if in_channels != out_channels:
                self.project = nn.Sequential(
-                    nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, bias_attr=False),
+                    nn.Conv2D(
+                        in_channels,
+                        out_channels,
+                        kernel_size=1,
+                        stride=1,
+                        bias_attr=False),
                    nn.BatchNorm2D(out_channels))
        else:
-            cargs.update(dict(root_kernel_size=root_kernel_size, root_residual=root_residual))
+            cargs.update(
+                dict(
+                    root_kernel_size=root_kernel_size,
+                    root_residual=root_residual))
            self.tree1 = DlaTree(
-                levels - 1, block, in_channels, 
-                out_channels, stride, root_dim=0, **cargs
-            )
+                levels - 1,
+                block,
+                in_channels,
+                out_channels,
+                stride,
+                root_dim=0,
+                **cargs)
            self.tree2 = DlaTree(
-                levels - 1, block, out_channels, 
-                out_channels, root_dim=root_dim + out_channels, **cargs
-            )
+                levels - 1,
+                block,
+                out_channels,
+                out_channels,
+                root_dim=root_dim + out_channels,
+                **cargs)

        if levels == 1:
-            self.root = DlaRoot(root_dim, out_channels, root_kernel_size, root_residual)
+            self.root = DlaRoot(root_dim, out_channels, root_kernel_size,
+                                root_residual)

        self.level_root = level_root
        self.root_dim = root_dim
@ -214,12 +259,20 @@ class DlaTree(nn.Layer):


 class DLA(nn.Layer):
-    def __init__(self, levels, channels, in_chans=3, cardinality=1,
-                 base_width=64, block=DlaBottleneck, residual_root=False,
-                 drop_rate=0.0, class_dim=1000, with_pool=True):
+    def __init__(self,
+                 levels,
+                 channels,
+                 in_chans=3,
+                 cardinality=1,
+                 base_width=64,
+                 block=DlaBottleneck,
+                 residual_root=False,
+                 drop_rate=0.0,
+                 class_num=1000,
+                 with_pool=True):
        super(DLA, self).__init__()
        self.channels = channels
-        self.class_dim = class_dim
+        self.class_num = class_num
        self.with_pool = with_pool
        self.cardinality = cardinality
        self.base_width = base_width
@ -227,46 +280,72 @@ class DLA(nn.Layer):

        self.base_layer = nn.Sequential(
            nn.Conv2D(
-                in_chans, channels[0], kernel_size=7,
-                stride=1, padding=3, bias_attr=False
-            ),
+                in_chans,
+                channels[0],
+                kernel_size=7,
+                stride=1,
+                padding=3,
+                bias_attr=False),
            nn.BatchNorm2D(channels[0]),
            nn.ReLU())

-        self.level0 = self._make_conv_level(channels[0], channels[0], levels[0])
-        self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2)
+        self.level0 = self._make_conv_level(channels[0], channels[0],
+                                            levels[0])
+        self.level1 = self._make_conv_level(
+            channels[0], channels[1], levels[1], stride=2)

        cargs = dict(
            cardinality=cardinality,
            base_width=base_width,
-            root_residual=residual_root
-        )
+            root_residual=residual_root)

        self.level2 = DlaTree(
-            levels[2], block, channels[1], 
-            channels[2], 2, level_root=False, **cargs
-        )
+            levels[2],
+            block,
+            channels[1],
+            channels[2],
+            2,
+            level_root=False,
+            **cargs)
        self.level3 = DlaTree(
-            levels[3], block, channels[2], 
-            channels[3], 2, level_root=True, **cargs
-        )
+            levels[3],
+            block,
+            channels[2],
+            channels[3],
+            2,
+            level_root=True,
+            **cargs)
        self.level4 = DlaTree(
-            levels[4], block, channels[3], 
-            channels[4], 2, level_root=True, **cargs
-        )
+            levels[4],
+            block,
+            channels[3],
+            channels[4],
+            2,
+            level_root=True,
+            **cargs)
        self.level5 = DlaTree(
-            levels[5], block, channels[4], 
-            channels[5], 2, level_root=True, **cargs
-        )
+            levels[5],
+            block,
+            channels[4],
+            channels[5],
+            2,
+            level_root=True,
+            **cargs)

        self.feature_info = [
            # rare to have a meaningful stride 1 level
-            dict(num_chs=channels[0], reduction=1, module='level0'),
-            dict(num_chs=channels[1], reduction=2, module='level1'),
-            dict(num_chs=channels[2], reduction=4, module='level2'),
-            dict(num_chs=channels[3], reduction=8, module='level3'),
-            dict(num_chs=channels[4], reduction=16, module='level4'),
-            dict(num_chs=channels[5], reduction=32, module='level5'),
+            dict(
+                num_chs=channels[0], reduction=1, module='level0'),
+            dict(
+                num_chs=channels[1], reduction=2, module='level1'),
+            dict(
+                num_chs=channels[2], reduction=4, module='level2'),
+            dict(
+                num_chs=channels[3], reduction=8, module='level3'),
+            dict(
+                num_chs=channels[4], reduction=16, module='level4'),
+            dict(
+                num_chs=channels[5], reduction=32, module='level5'),
        ]

        self.num_features = channels[-1]
@ -274,8 +353,8 @@ class DLA(nn.Layer):
        if with_pool:
            self.global_pool = nn.AdaptiveAvgPool2D(1)

-        if class_dim > 0:
-            self.fc = nn.Conv2D(self.num_features, class_dim, 1)
+        if class_num > 0:
+            self.fc = nn.Conv2D(self.num_features, class_num, 1)

        for m in self.sublayers():
            if isinstance(m, nn.Conv2D):
@ -291,12 +370,14 @@ class DLA(nn.Layer):
        for i in range(convs):
            modules.extend([
                nn.Conv2D(
-                    inplanes, planes, kernel_size=3, 
+                    inplanes,
+                    planes,
+                    kernel_size=3,
                    stride=stride if i == 0 else 1,
-                    padding=dilation, bias_attr=False, dilation=dilation
-                ),
-                nn.BatchNorm2D(planes),
-                nn.ReLU()])
+                    padding=dilation,
+                    bias_attr=False,
+                    dilation=dilation), nn.BatchNorm2D(planes), nn.ReLU()
+            ])
            inplanes = planes
        return nn.Sequential(*modules)

@ -321,7 +402,7 @@ class DLA(nn.Layer):
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)

-        if self.class_dim > 0:
+        if self.class_num > 0:
            x = self.fc(x)
            x = x.flatten(1)

@ -342,124 +423,104 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):


 def DLA34(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 2, 1),
-        channels=(16, 32, 64, 128, 256, 512),
-        block=DlaBasic,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 2, 1),
+                channels=(16, 32, 64, 128, 256, 512),
+                block=DlaBasic,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["DLA34"])
    return model


 def DLA46_c(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 2, 1),
-        channels=(16, 32, 64, 64, 128, 256),
-        block=DlaBottleneck,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 2, 1),
+                channels=(16, 32, 64, 64, 128, 256),
+                block=DlaBottleneck,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"])
    return model


 def DLA46x_c(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 2, 1),
-        channels=(16, 32, 64, 64, 128, 256),
-        block=DlaBottleneck,
-        cardinality=32,
-        base_width=4,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 2, 1),
+                channels=(16, 32, 64, 64, 128, 256),
+                block=DlaBottleneck,
+                cardinality=32,
+                base_width=4,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"])
    return model


 def DLA60(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 3, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 3, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["DLA60"])
    return model


 def DLA60x(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 3, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        cardinality=32,
-        base_width=4,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 3, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                cardinality=32,
+                base_width=4,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["DLA60x"])
    return model


 def DLA60x_c(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 3, 1),
-        channels=(16, 32, 64, 64, 128, 256),
-        block=DlaBottleneck,
-        cardinality=32,
-        base_width=4,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 3, 1),
+                channels=(16, 32, 64, 64, 128, 256),
+                block=DlaBottleneck,
+                cardinality=32,
+                base_width=4,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"])
    return model


 def DLA102(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 3, 4, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        residual_root=True,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 3, 4, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                residual_root=True,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["DLA102"])
    return model


 def DLA102x(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 3, 4, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        cardinality=32,
-        base_width=4,
-        residual_root=True,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 3, 4, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                cardinality=32,
+                base_width=4,
+                residual_root=True,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["DLA102x"])
    return model


 def DLA102x2(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 3, 4, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        cardinality=64,
-        base_width=4,
-        residual_root=True,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 3, 4, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                cardinality=64,
+                base_width=4,
+                residual_root=True,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"])
    return model


 def DLA169(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 2, 3, 5, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        residual_root=True,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 2, 3, 5, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                residual_root=True,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["DLA169"])
    return model
--- a/ppcls/arch/backbone/model_zoo/dpn.py
+++ b/ppcls/arch/backbone/model_zoo/dpn.py
@ -29,12 +29,18 @@ import math

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"DPN68": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams",
-              "DPN92": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams",
-              "DPN98": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams",
-              "DPN107": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams",
-              "DPN131": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "DPN68":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams",
+    "DPN92":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams",
+    "DPN98":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams",
+    "DPN107":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams",
+    "DPN131":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -211,10 +217,10 @@ class DualPathFactory(nn.Layer):


 class DPN(nn.Layer):
-    def __init__(self, layers=68, class_dim=1000):
+    def __init__(self, layers=68, class_num=1000):
        super(DPN, self).__init__()

-        self._class_dim = class_dim
+        self._class_num = class_num

        args = self.get_net_args(layers)
        bws = args['bw']
@ -309,7 +315,7 @@ class DPN(nn.Layer):

        self.out = Linear(
            out_channel,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc_weights"),
            bias_attr=ParamAttr(name="fc_offset"))
@ -401,6 +407,7 @@ class DPN(nn.Layer):

        return net_arg

+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
    if pretrained is False:
        pass
--- a/ppcls/arch/backbone/model_zoo/efficientnet.py
+++ b/ppcls/arch/backbone/model_zoo/efficientnet.py
@ -11,16 +11,26 @@ import copy

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"EfficientNetB0_small":  "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams",
-              "EfficientNetB0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams",
-              "EfficientNetB1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams",
-              "EfficientNetB2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams",
-              "EfficientNetB3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams",
-              "EfficientNetB4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams",
-              "EfficientNetB5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams",
-              "EfficientNetB6": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams",
-              "EfficientNetB7": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "EfficientNetB0_small":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams",
+    "EfficientNetB0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams",
+    "EfficientNetB1":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams",
+    "EfficientNetB2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams",
+    "EfficientNetB3":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams",
+    "EfficientNetB4":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams",
+    "EfficientNetB5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams",
+    "EfficientNetB6":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams",
+    "EfficientNetB7":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -725,7 +735,7 @@ class EfficientNet(nn.Layer):
                 padding_type="SAME",
                 override_params=None,
                 use_se=True,
-                 class_dim=1000):
+                 class_num=1000):
        super(EfficientNet, self).__init__()

        model_name = 'efficientnet-' + name
@ -778,7 +788,7 @@ class EfficientNet(nn.Layer):
        param_attr, bias_attr = init_fc_layer("_fc")
        self._fc = Linear(
            output_channels,
-            class_dim,
+            class_num,
            weight_attr=param_attr,
            bias_attr=bias_attr)

--- a/ppcls/arch/backbone/model_zoo/ghostnet.py
+++ b/ppcls/arch/backbone/model_zoo/ghostnet.py
@ -23,10 +23,14 @@ from paddle.nn.initializer import Uniform, KaimingNormal

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"GhostNet_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams",
-              "GhostNet_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams",
-              "GhostNet_x1_3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "GhostNet_x0_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams",
+    "GhostNet_x1_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams",
+    "GhostNet_x1_3":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -215,7 +219,7 @@ class GhostBottleneck(nn.Layer):


 class GhostNet(nn.Layer):
-    def __init__(self, scale, class_dim=1000):
+    def __init__(self, scale, class_num=1000):
        super(GhostNet, self).__init__()
        self.cfgs = [
            # k, t, c, SE, s
@ -290,7 +294,7 @@ class GhostNet(nn.Layer):
        stdv = 1.0 / math.sqrt(self._fc0_output_channels * 1.0)
        self.fc_1 = Linear(
            self._fc0_output_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                name="fc_1_weights", initializer=Uniform(-stdv, stdv)),
            bias_attr=ParamAttr(name="fc_1_offset"))
@ -338,17 +342,20 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def GhostNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
    model = GhostNet(scale=0.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld)
    return model


 def GhostNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
    model = GhostNet(scale=1.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld)
    return model


 def GhostNet_x1_3(pretrained=False, use_ssld=False, **kwargs):
    model = GhostNet(scale=1.3, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/googlenet.py
+++ b/ppcls/arch/backbone/model_zoo/googlenet.py
@ -10,8 +10,10 @@ import math

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"GoogLeNet": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "GoogLeNet":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -101,7 +103,7 @@ class Inception(nn.Layer):


 class GoogLeNetDY(nn.Layer):
-    def __init__(self, class_dim=1000):
+    def __init__(self, class_num=1000):
        super(GoogLeNetDY, self).__init__()
        self._conv = ConvLayer(3, 64, 7, 2, name="conv1")
        self._pool = MaxPool2D(kernel_size=3, stride=2)
@ -134,7 +136,7 @@ class GoogLeNetDY(nn.Layer):
        self._drop = Dropout(p=0.4, mode="downscale_in_infer")
        self._fc_out = Linear(
            1024,
-            class_dim,
+            class_num,
            weight_attr=xavier(1024, 1, "out"),
            bias_attr=ParamAttr(name="out_offset"))
        self._pool_o1 = AvgPool2D(kernel_size=5, stride=3)
@ -147,7 +149,7 @@ class GoogLeNetDY(nn.Layer):
        self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer")
        self._out1 = Linear(
            1024,
-            class_dim,
+            class_num,
            weight_attr=xavier(1024, 1, "out1"),
            bias_attr=ParamAttr(name="out1_offset"))
        self._pool_o2 = AvgPool2D(kernel_size=5, stride=3)
@ -160,7 +162,7 @@ class GoogLeNetDY(nn.Layer):
        self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer")
        self._out2 = Linear(
            1024,
-            class_dim,
+            class_num,
            weight_attr=xavier(1024, 1, "out2"),
            bias_attr=ParamAttr(name="out2_offset"))

@ -222,5 +224,6 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def GoogLeNet(pretrained=False, use_ssld=False, **kwargs):
    model = GoogLeNetDY(**kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/gvt.py
+++ b/ppcls/arch/backbone/model_zoo/gvt.py
@ -25,18 +25,23 @@ from .vision_transformer import Block as ViTBlock
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "pcpvt_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams",
-              "pcpvt_base": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams",
-              "pcpvt_large": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams",
-              "alt_gvt_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams",
-              "alt_gvt_base": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams",
-              "alt_gvt_large": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams"
-             }
+    "pcpvt_small":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams",
+    "pcpvt_base":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams",
+    "pcpvt_large":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams",
+    "alt_gvt_small":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams",
+    "alt_gvt_base":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams",
+    "alt_gvt_large":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams"
+}

 __all__ = list(MODEL_URLS.keys())


-
 class GroupAttention(nn.Layer):
    """LSA: self attention within a group.
    """
@ -522,7 +527,7 @@ class ALTGVT(PCPVT):
                 img_size=224,
                 patch_size=4,
                 in_chans=3,
-                 class_dim=1000,
+                 class_num=1000,
                 embed_dims=[64, 128, 256],
                 num_heads=[1, 2, 4],
                 mlp_ratios=[4, 4, 4],
@ -536,7 +541,7 @@ class ALTGVT(PCPVT):
                 sr_ratios=[4, 2, 1],
                 block_cls=GroupBlock,
                 wss=[7, 7, 7]):
-        super().__init__(img_size, patch_size, in_chans, class_dim, embed_dims,
+        super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
                         num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
                         attn_drop_rate, drop_path_rate, norm_layer, depths,
                         sr_ratios, block_cls)
@ -568,6 +573,7 @@ class ALTGVT(PCPVT):
            cur += depths[k]
        self.apply(self._init_weights)

+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
    if pretrained is False:
        pass
@ -593,7 +599,8 @@ def pcpvt_small(pretrained=False, use_ssld=False, **kwargs):
        depths=[3, 4, 6, 3],
        sr_ratios=[8, 4, 2, 1],
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["pcpvt_small"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["pcpvt_small"], use_ssld=use_ssld)
    return model


@ -609,7 +616,8 @@ def pcpvt_base(pretrained=False, use_ssld=False, **kwargs):
        depths=[3, 4, 18, 3],
        sr_ratios=[8, 4, 2, 1],
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["pcpvt_base"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["pcpvt_base"], use_ssld=use_ssld)
    return model


@ -625,7 +633,8 @@ def pcpvt_large(pretrained=False, use_ssld=False, **kwargs):
        depths=[3, 8, 27, 3],
        sr_ratios=[8, 4, 2, 1],
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["pcpvt_large"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["pcpvt_large"], use_ssld=use_ssld)
    return model


@ -642,7 +651,8 @@ def alt_gvt_small(pretrained=False, use_ssld=False, **kwargs):
        wss=[7, 7, 7, 7],
        sr_ratios=[8, 4, 2, 1],
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["alt_gvt_small"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["alt_gvt_small"], use_ssld=use_ssld)
    return model


@ -659,7 +669,8 @@ def alt_gvt_base(pretrained=False, use_ssld=False, **kwargs):
        wss=[7, 7, 7, 7],
        sr_ratios=[8, 4, 2, 1],
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["alt_gvt_base"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["alt_gvt_base"], use_ssld=use_ssld)
    return model


@ -676,5 +687,6 @@ def alt_gvt_large(pretrained=False, use_ssld=False, **kwargs):
        wss=[7, 7, 7, 7],
        sr_ratios=[8, 4, 2, 1],
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["alt_gvt_large"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["alt_gvt_large"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/hardnet.py
+++ b/ppcls/arch/backbone/model_zoo/hardnet.py
@ -17,7 +17,6 @@ import paddle.nn as nn

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-
 MODEL_URLS = {
    'HarDNet39_ds':
    'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet39_ds_pretrained.pdparams',
@ -29,51 +28,70 @@ MODEL_URLS = {
    'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet85_pretrained.pdparams'
 }

-
 __all__ = MODEL_URLS.keys()


-def ConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
+def ConvLayer(in_channels,
+              out_channels,
+              kernel_size=3,
+              stride=1,
+              bias_attr=False):
    layer = nn.Sequential(
        ('conv', nn.Conv2D(
-            in_channels, out_channels, kernel_size=kernel_size,
-            stride=stride, padding=kernel_size//2, groups=1, bias_attr=bias_attr
-        )),
-        ('norm', nn.BatchNorm2D(out_channels)),
-        ('relu', nn.ReLU6())
-    )
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=kernel_size // 2,
+            groups=1,
+            bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)),
+        ('relu', nn.ReLU6()))
    return layer


-def DWConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
+def DWConvLayer(in_channels,
+                out_channels,
+                kernel_size=3,
+                stride=1,
+                bias_attr=False):
    layer = nn.Sequential(
        ('dwconv', nn.Conv2D(
-            in_channels, out_channels, kernel_size=kernel_size,
-            stride=stride, padding=1, groups=out_channels, bias_attr=bias_attr
-        )),
-        ('norm', nn.BatchNorm2D(out_channels))
-    )
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=1,
+            groups=out_channels,
+            bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)))
    return layer


 def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
    layer = nn.Sequential(
-        ('layer1', ConvLayer(in_channels, out_channels, kernel_size=kernel_size)),
-        ('layer2', DWConvLayer(out_channels, out_channels, stride=stride))
-    )
+        ('layer1', ConvLayer(
+            in_channels, out_channels, kernel_size=kernel_size)),
+        ('layer2', DWConvLayer(
+            out_channels, out_channels, stride=stride)))
    return layer


 class HarDBlock(nn.Layer):
-    def __init__(self, in_channels, growth_rate, grmul, n_layers, 
-                 keepBase=False, residual_out=False, dwconv=False):
+    def __init__(self,
+                 in_channels,
+                 growth_rate,
+                 grmul,
+                 n_layers,
+                 keepBase=False,
+                 residual_out=False,
+                 dwconv=False):
        super().__init__()
        self.keepBase = keepBase
        self.links = []
        layers_ = []
        self.out_channels = 0  # if upsample else in_channels
        for i in range(n_layers):
-            outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
+            outch, inch, link = self.get_link(i + 1, in_channels, growth_rate,
+                                              grmul)
            self.links.append(link)
            if dwconv:
                layers_.append(CombConvLayer(inch, outch))
@ -92,7 +110,7 @@ class HarDBlock(nn.Layer):

        link = []
        for i in range(10):
-            dv = 2 ** i
+            dv = 2**i
            if layer % dv == 0:
                k = layer - dv
                link.append(k)
@ -126,7 +144,7 @@ class HarDBlock(nn.Layer):
        t = len(layers_)
        out_ = []
        for i in range(t):
-            if (i == 0 and self.keepBase) or (i == t-1) or (i % 2 == 1):
+            if (i == 0 and self.keepBase) or (i == t - 1) or (i % 2 == 1):
                out_.append(layers_[i])
        out = paddle.concat(out_, 1)

@ -134,8 +152,11 @@ class HarDBlock(nn.Layer):


 class HarDNet(nn.Layer):
-    def __init__(self, depth_wise=False, arch=85,
-                 class_dim=1000, with_pool=True):
+    def __init__(self,
+                 depth_wise=False,
+                 arch=85,
+                 class_num=1000,
+                 with_pool=True):
        super().__init__()
        first_ch = [32, 64]
        second_kernel = 3
@ -146,16 +167,16 @@ class HarDNet(nn.Layer):
        # HarDNet68
        ch_list = [128, 256, 320, 640, 1024]
        gr = [14, 16, 20, 40, 160]
-        n_layers = [8, 16, 16, 16,  4]
-        downSamp = [1,  0,  1,  1,  0]
+        n_layers = [8, 16, 16, 16, 4]
+        downSamp = [1, 0, 1, 1, 0]

        if arch == 85:
            # HarDNet85
            first_ch = [48, 96]
            ch_list = [192, 256, 320, 480, 720, 1280]
-            gr = [24,  24,  28,  36,  48, 256]
-            n_layers = [8,  16,  16,  16,  16,   4]
-            downSamp = [1,   0,   1,   0,   1,   0]
+            gr = [24, 24, 28, 36, 48, 256]
+            n_layers = [8, 16, 16, 16, 16, 4]
+            downSamp = [1, 0, 1, 0, 1, 0]
            drop_rate = 0.2

        elif arch == 39:
@ -163,9 +184,9 @@ class HarDNet(nn.Layer):
            first_ch = [24, 48]
            ch_list = [96, 320, 640, 1024]
            grmul = 1.6
-            gr = [16,  20, 64, 160]
-            n_layers = [4,  16,  8,   4]
-            downSamp = [1,   1,  1,   0]
+            gr = [16, 20, 64, 160]
+            n_layers = [4, 16, 8, 4]
+            downSamp = [1, 1, 1, 0]

        if depth_wise:
            second_kernel = 1
@ -177,12 +198,17 @@ class HarDNet(nn.Layer):

        # First Layer: Standard Conv3x3, Stride=2
        self.base.append(
-            ConvLayer(in_channels=3, out_channels=first_ch[0], kernel_size=3,
-                      stride=2, bias_attr=False))
+            ConvLayer(
+                in_channels=3,
+                out_channels=first_ch[0],
+                kernel_size=3,
+                stride=2,
+                bias_attr=False))

        # Second Layer
        self.base.append(
-            ConvLayer(first_ch[0], first_ch[1],  kernel_size=second_kernel))
+            ConvLayer(
+                first_ch[0], first_ch[1], kernel_size=second_kernel))

        # Maxpooling or DWConv3x3 downsampling
        if max_pool:
@ -197,7 +223,7 @@ class HarDNet(nn.Layer):
            ch = blk.out_channels
            self.base.append(blk)

-            if i == blks-1 and arch == 85:
+            if i == blks - 1 and arch == 85:
                self.base.append(nn.Dropout(0.1))

            self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
@ -208,17 +234,17 @@ class HarDNet(nn.Layer):
                else:
                    self.base.append(DWConvLayer(ch, ch, stride=2))

-        ch = ch_list[blks-1]
+        ch = ch_list[blks - 1]

        layers = []

        if with_pool:
            layers.append(nn.AdaptiveAvgPool2D((1, 1)))

-        if class_dim > 0:
+        if class_num > 0:
            layers.append(nn.Flatten())
            layers.append(nn.Dropout(drop_rate))
-            layers.append(nn.Linear(ch, class_dim))
+            layers.append(nn.Linear(ch, class_num))

        self.base.append(nn.Sequential(*layers))

--- a/ppcls/arch/backbone/model_zoo/inception_v4.py
+++ b/ppcls/arch/backbone/model_zoo/inception_v4.py
@ -23,7 +23,10 @@ import math

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"InceptionV4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams"}
+MODEL_URLS = {
+    "InceptionV4":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams"
+}

 __all__ = list(MODEL_URLS.keys())

@ -392,7 +395,7 @@ class InceptionC(nn.Layer):


 class InceptionV4DY(nn.Layer):
-    def __init__(self, class_dim=1000):
+    def __init__(self, class_num=1000):
        super(InceptionV4DY, self).__init__()
        self._inception_stem = InceptionStem()

@ -420,7 +423,7 @@ class InceptionV4DY(nn.Layer):
        stdv = 1.0 / math.sqrt(1536 * 1.0)
        self.out = Linear(
            1536,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="final_fc_weights"),
            bias_attr=ParamAttr(name="final_fc_offset"))
@ -466,7 +469,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
            "pretrained type is not available. Please use `string` or `boolean` type."
        )

+
 def InceptionV4(pretrained=False, use_ssld=False, **kwargs):
    model = InceptionV4DY(**kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/levit.py
+++ b/ppcls/arch/backbone/model_zoo/levit.py
@ -27,12 +27,17 @@ from .vision_transformer import trunc_normal_, zeros_, ones_, Identity
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "LeViT_128S": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams",
-              "LeViT_128": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams",
-              "LeViT_192": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams",
-              "LeViT_256": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams",
-              "LeViT_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams",
-             }
+    "LeViT_128S":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams",
+    "LeViT_128":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams",
+    "LeViT_192":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams",
+    "LeViT_256":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams",
+    "LeViT_384":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -326,7 +331,7 @@ class LeViT(nn.Layer):
                 img_size=224,
                 patch_size=16,
                 in_chans=3,
-                 class_dim=1000,
+                 class_num=1000,
                 embed_dim=[192],
                 key_dim=[64],
                 depth=[12],
@ -341,7 +346,7 @@ class LeViT(nn.Layer):
                 drop_path=0):
        super().__init__()

-        self.class_dim = class_dim
+        self.class_num = class_num
        self.num_features = embed_dim[-1]
        self.embed_dim = embed_dim
        self.distillation = distillation
@ -403,10 +408,10 @@ class LeViT(nn.Layer):

        # Classifier head
        self.head = BN_Linear(embed_dim[-1],
-                              class_dim) if class_dim > 0 else Identity()
+                              class_num) if class_num > 0 else Identity()
        if distillation:
            self.head_dist = BN_Linear(
-                embed_dim[-1], class_dim) if class_dim > 0 else Identity()
+                embed_dim[-1], class_num) if class_num > 0 else Identity()

    def forward(self, x):
        x = self.patch_embed(x)
@ -423,7 +428,7 @@ class LeViT(nn.Layer):
        return x


-def model_factory(C, D, X, N, drop_path, class_dim, distillation):
+def model_factory(C, D, X, N, drop_path, class_num, distillation):
    embed_dim = [int(x) for x in C.split('_')]
    num_heads = [int(x) for x in N.split('_')]
    depth = [int(x) for x in X.split('_')]
@ -444,7 +449,7 @@ def model_factory(C, D, X, N, drop_path, class_dim, distillation):
        attention_activation=act,
        mlp_activation=act,
        hybrid_backbone=b16(embed_dim[0], activation=act),
-        class_dim=class_dim,
+        class_num=class_num,
        drop_path=drop_path,
        distillation=distillation)

@ -489,6 +494,7 @@ specification = {
    },
 }

+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
    if pretrained is False:
        pass
@ -502,46 +508,71 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
        )


-def LeViT_128S(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+def LeViT_128S(pretrained=False,
+               use_ssld=False,
+               class_num=1000,
+               distillation=False,
+               **kwargs):
    model = model_factory(
        **specification['LeViT_128S'],
-        class_dim=class_dim,
+        class_num=class_num,
        distillation=distillation)
-    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld)
    return model


-def LeViT_128(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+def LeViT_128(pretrained=False,
+              use_ssld=False,
+              class_num=1000,
+              distillation=False,
+              **kwargs):
    model = model_factory(
        **specification['LeViT_128'],
-        class_dim=class_dim,
+        class_num=class_num,
        distillation=distillation)
-    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld)
    return model


-def LeViT_192(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+def LeViT_192(pretrained=False,
+              use_ssld=False,
+              class_num=1000,
+              distillation=False,
+              **kwargs):
    model = model_factory(
        **specification['LeViT_192'],
-        class_dim=class_dim,
+        class_num=class_num,
        distillation=distillation)
-    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld)
    return model


-def LeViT_256(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+def LeViT_256(pretrained=False,
+              use_ssld=False,
+              class_num=1000,
+              distillation=False,
+              **kwargs):
    model = model_factory(
        **specification['LeViT_256'],
-        class_dim=class_dim,
+        class_num=class_num,
        distillation=distillation)
-    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld)
    return model


-def LeViT_384(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+def LeViT_384(pretrained=False,
+              use_ssld=False,
+              class_num=1000,
+              distillation=False,
+              **kwargs):
    model = model_factory(
        **specification['LeViT_384'],
-        class_dim=class_dim,
+        class_num=class_num,
        distillation=distillation)
-    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/mixnet.py
+++ b/ppcls/arch/backbone/model_zoo/mixnet.py
@ -25,9 +25,14 @@ import paddle.nn as nn

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"MixNet_S": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams", 
-              "MixNet_M": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams", 
-              "MixNet_L": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams"}
+MODEL_URLS = {
+    "MixNet_S":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams",
+    "MixNet_M":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams",
+    "MixNet_L":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams"
+}

 __all__ = list(MODEL_URLS.keys())

@ -617,7 +622,7 @@ class MixNet(nn.Layer):
        Number of input channels.
    in_size : tuple of two ints, default (224, 224)
        Spatial size of the expected input image.
-    class_dim : int, default 1000
+    class_num : int, default 1000
        Number of classification classes.
    """

@ -632,10 +637,10 @@ class MixNet(nn.Layer):
                 se_factors,
                 in_channels=3,
                 in_size=(224, 224),
-                 class_dim=1000):
+                 class_num=1000):
        super(MixNet, self).__init__()
        self.in_size = in_size
-        self.class_dim = class_dim
+        self.class_num = class_num

        self.features = nn.Sequential()
        self.features.add_sublayer(
@ -687,7 +692,7 @@ class MixNet(nn.Layer):
                kernel_size=7, stride=1))

        self.output = nn.Linear(
-            in_features=in_channels, out_features=class_dim)
+            in_features=in_channels, out_features=class_num)

    def forward(self, x):
        x = self.features(x)
@ -773,9 +778,11 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
            "pretrained type is not available. Please use `string` or `boolean` type."
        )

+
 def MixNet_S(pretrained=False, use_ssld=False, **kwargs):
    model = InceptionV4DY(**kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
    return model


@ -786,7 +793,8 @@ def MixNet_S(**kwargs):
    """
    model = get_mixnet(
        version="s", width_scale=1.0, model_name="MixNet_S", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld)
    return model


@ -797,7 +805,8 @@ def MixNet_M(**kwargs):
    """
    model = get_mixnet(
        version="m", width_scale=1.0, model_name="MixNet_M", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld)
    return model


@ -808,6 +817,6 @@ def MixNet_L(**kwargs):
    """
    model = get_mixnet(
        version="m", width_scale=1.3, model_name="MixNet_L", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld)
    return model
-
--- a/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
+++ b/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
@ -28,12 +28,20 @@ import math

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"MobileNetV2_x0_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams", 
-              "MobileNetV2_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams", 
-              "MobileNetV2_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams",
-              "MobileNetV2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams",
-              "MobileNetV2_x1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams",
-              "MobileNetV2_x2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"}
+MODEL_URLS = {
+    "MobileNetV2_x0_25":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams",
+    "MobileNetV2_x0_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams",
+    "MobileNetV2_x0_75":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams",
+    "MobileNetV2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams",
+    "MobileNetV2_x1_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams",
+    "MobileNetV2_x2_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"
+}

 __all__ = list(MODEL_URLS.keys())

@ -155,10 +163,10 @@ class InvresiBlocks(nn.Layer):


 class MobileNet(nn.Layer):
-    def __init__(self, class_dim=1000, scale=1.0, prefix_name=""):
+    def __init__(self, class_num=1000, scale=1.0, prefix_name=""):
        super(MobileNet, self).__init__()
        self.scale = scale
-        self.class_dim = class_dim
+        self.class_num = class_num

        bottleneck_params_list = [
            (1, 16, 1, 1),
@ -209,7 +217,7 @@ class MobileNet(nn.Layer):

        self.out = Linear(
            self.out_c,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(name=prefix_name + "fc10_weights"),
            bias_attr=ParamAttr(name=prefix_name + "fc10_offset"))

@ -239,35 +247,41 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def MobileNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs):
    model = MobileNet(scale=0.25, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld)
    return model


 def MobileNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs):
    model = MobileNet(scale=0.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld)
    return model


 def MobileNetV2_x0_75(pretrained=False, use_ssld=False, **kwargs):
    model = MobileNet(scale=0.75, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld)
    return model


 def MobileNetV2(pretrained=False, use_ssld=False, **kwargs):
    model = MobileNet(scale=1.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld)
    return model


 def MobileNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs):
    model = MobileNet(scale=1.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld)
    return model


 def MobileNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs):
    model = MobileNet(scale=2.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/rednet.py
+++ b/ppcls/arch/backbone/model_zoo/rednet.py
@ -19,7 +19,6 @@ from paddle.vision.models import resnet

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-
 MODEL_URLS = {
    "RedNet26":
    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet26_pretrained.pdparams",
@ -33,7 +32,6 @@ MODEL_URLS = {
    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet152_pretrained.pdparams"
 }

-
 __all__ = MODEL_URLS.keys()


@ -51,50 +49,53 @@ class Involution(nn.Layer):
                in_channels=channels,
                out_channels=channels // reduction_ratio,
                kernel_size=1,
-                bias_attr=False
-            )),
+                bias_attr=False)),
            ('bn', nn.BatchNorm2D(channels // reduction_ratio)),
-            ('activate', nn.ReLU())
-        )
-        self.conv2 = nn.Sequential(
-            ('conv', nn.Conv2D(
-                in_channels=channels // reduction_ratio,
-                out_channels=kernel_size**2 * self.groups,
-                kernel_size=1,
-                stride=1
-            ))
-        )
+            ('activate', nn.ReLU()))
+        self.conv2 = nn.Sequential(('conv', nn.Conv2D(
+            in_channels=channels // reduction_ratio,
+            out_channels=kernel_size**2 * self.groups,
+            kernel_size=1,
+            stride=1)))
        if stride > 1:
            self.avgpool = nn.AvgPool2D(stride, stride)

    def forward(self, x):
-        weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
+        weight = self.conv2(
+            self.conv1(x if self.stride == 1 else self.avgpool(x)))
        b, c, h, w = weight.shape
-        weight = weight.reshape((b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2)
+        weight = weight.reshape(
+            (b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2)

-        out = nn.functional.unfold(x, self.kernel_size, self.stride, (self.kernel_size-1)//2, 1)
-        out = out.reshape((b, self.groups, self.group_channels, self.kernel_size**2, h, w))
+        out = nn.functional.unfold(x, self.kernel_size, self.stride,
+                                   (self.kernel_size - 1) // 2, 1)
+        out = out.reshape(
+            (b, self.groups, self.group_channels, self.kernel_size**2, h, w))
        out = (weight * out).sum(axis=3).reshape((b, self.channels, h, w))
        return out


 class BottleneckBlock(resnet.BottleneckBlock):
-    def __init__(self, inplanes, planes, stride=1, downsample=None, 
-                 groups=1, base_width=64, dilation=1, norm_layer=None):
-        super(BottleneckBlock, self).__init__(
-            inplanes, planes, stride, downsample, 
-            groups, base_width, dilation, norm_layer
-        )
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 groups=1,
+                 base_width=64,
+                 dilation=1,
+                 norm_layer=None):
+        super(BottleneckBlock, self).__init__(inplanes, planes, stride,
+                                              downsample, groups, base_width,
+                                              dilation, norm_layer)
        width = int(planes * (base_width / 64.)) * groups
        self.conv2 = Involution(width, 7, stride)


 class RedNet(resnet.ResNet):
-    def __init__(self, block, depth, class_dim=1000, with_pool=True):
+    def __init__(self, block, depth, class_num=1000, with_pool=True):
        super(RedNet, self).__init__(
-            block=block, depth=50, 
-            num_classes=class_dim, with_pool=with_pool
-        )
+            block=block, depth=50, num_classes=class_num, with_pool=with_pool)
        layer_cfg = {
            26: [1, 2, 4, 1],
            38: [2, 3, 5, 2],
@ -108,7 +109,7 @@ class RedNet(resnet.ResNet):
        self.bn1 = None
        self.relu = None
        self.inplanes = 64
-        self.class_dim = class_dim
+        self.class_num = class_num
        self.stem = nn.Sequential(
            nn.Sequential(
                ('conv', nn.Conv2D(
@ -117,11 +118,9 @@ class RedNet(resnet.ResNet):
                    kernel_size=3,
                    stride=2,
                    padding=1,
-                    bias_attr=False
-                )),
+                    bias_attr=False)),
                ('bn', nn.BatchNorm2D(self.inplanes // 2)),
-                ('activate', nn.ReLU())
-            ),
+                ('activate', nn.ReLU())),
            Involution(self.inplanes // 2, 3, 1),
            nn.BatchNorm2D(self.inplanes // 2),
            nn.ReLU(),
@ -132,12 +131,8 @@ class RedNet(resnet.ResNet):
                    kernel_size=3,
                    stride=1,
                    padding=1,
-                    bias_attr=False
-                )),
-                ('bn', nn.BatchNorm2D(self.inplanes)),
-                ('activate', nn.ReLU())
-            )
-        )
+                    bias_attr=False)), ('bn', nn.BatchNorm2D(self.inplanes)),
+                ('activate', nn.ReLU())))

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
@ -156,7 +151,7 @@ class RedNet(resnet.ResNet):
        if self.with_pool:
            x = self.avgpool(x)

-        if self.class_dim > 0:
+        if self.class_num > 0:
            x = paddle.flatten(x, 1)
            x = self.fc(x)

--- a/ppcls/arch/backbone/model_zoo/regnet.py
+++ b/ppcls/arch/backbone/model_zoo/regnet.py
@ -28,13 +28,20 @@ import math

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"RegNetX_200MF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams", 
-              "RegNetX_4GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams", 
-              "RegNetX_32GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams", 
-              "RegNetY_200MF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_200MF_pretrained.pdparams", 
-              "RegNetY_4GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams", 
-              "RegNetY_32GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams", 
-             }
+MODEL_URLS = {
+    "RegNetX_200MF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams",
+    "RegNetX_4GF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams",
+    "RegNetX_32GF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams",
+    "RegNetY_200MF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_200MF_pretrained.pdparams",
+    "RegNetY_4GF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams",
+    "RegNetY_32GF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -235,7 +242,7 @@ class RegNet(nn.Layer):
                 bot_mul,
                 q=8,
                 se_on=False,
-                 class_dim=1000):
+                 class_num=1000):
        super(RegNet, self).__init__()

        # Generate RegNet ws per block
@ -301,7 +308,7 @@ class RegNet(nn.Layer):

        self.out = Linear(
            self.pool2d_avg_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
            bias_attr=ParamAttr(name="fc_0.b_0"))
@ -331,8 +338,16 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def RegNetX_200MF(pretrained=False, use_ssld=False, **kwargs):
    model = RegNet(
-        w_a=36.44, w_0=24, w_m=2.49, d=13, group_w=8, bot_mul=1.0, q=8, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_200MF"], use_ssld=use_ssld)
+        w_a=36.44,
+        w_0=24,
+        w_m=2.49,
+        d=13,
+        group_w=8,
+        bot_mul=1.0,
+        q=8,
+        **kwargs)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_200MF"], use_ssld=use_ssld)
    return model


@ -346,7 +361,8 @@ def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs):
        bot_mul=1.0,
        q=8,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_4GF"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_4GF"], use_ssld=use_ssld)
    return model


@ -360,7 +376,8 @@ def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs):
        bot_mul=1.0,
        q=8,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
    return model


@ -375,7 +392,8 @@ def RegNetY_200MF(pretrained=False, use_ssld=False, **kwargs):
        q=8,
        se_on=True,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
    return model


@ -390,7 +408,8 @@ def RegNetY_4GF(pretrained=False, use_ssld=False, **kwargs):
        q=8,
        se_on=True,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
    return model


@ -405,5 +424,6 @@ def RegNetY_32GF(pretrained=False, use_ssld=False, **kwargs):
        q=8,
        se_on=True,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/repvgg.py
+++ b/ppcls/arch/backbone/model_zoo/repvgg.py
@ -4,24 +4,37 @@ import numpy as np

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"RepVGG_A0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams",
-              "RepVGG_A1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams",
-              "RepVGG_A2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams",
-              "RepVGG_B0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams",
-              "RepVGG_B1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams",
-              "RepVGG_B2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams",
-              "RepVGG_B3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams",
-              "RepVGG_B1g2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams",
-              "RepVGG_B1g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams",
-              "RepVGG_B2g2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g2_pretrained.pdparams",
-              "RepVGG_B2g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams",
-              "RepVGG_B3g2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g2_pretrained.pdparams",
-              "RepVGG_B3g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "RepVGG_A0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams",
+    "RepVGG_A1":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams",
+    "RepVGG_A2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams",
+    "RepVGG_B0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams",
+    "RepVGG_B1":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams",
+    "RepVGG_B2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams",
+    "RepVGG_B3":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams",
+    "RepVGG_B1g2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams",
+    "RepVGG_B1g4":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams",
+    "RepVGG_B2g2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g2_pretrained.pdparams",
+    "RepVGG_B2g4":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams",
+    "RepVGG_B3g2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g2_pretrained.pdparams",
+    "RepVGG_B3g4":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

-
 optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
 g2_map = {l: 2 for l in optional_groupwise_layers}
 g4_map = {l: 4 for l in optional_groupwise_layers}
@ -174,7 +187,7 @@ class RepVGG(nn.Layer):
                 num_blocks,
                 width_multiplier=None,
                 override_groups_map=None,
-                 class_dim=1000):
+                 class_num=1000):
        super(RepVGG, self).__init__()

        assert len(width_multiplier) == 4
@ -200,7 +213,7 @@ class RepVGG(nn.Layer):
        self.stage4 = self._make_stage(
            int(512 * width_multiplier[3]), num_blocks[3], stride=2)
        self.gap = nn.AdaptiveAvgPool2D(output_size=1)
-        self.linear = nn.Linear(int(512 * width_multiplier[3]), class_dim)
+        self.linear = nn.Linear(int(512 * width_multiplier[3]), class_num)

    def _make_stage(self, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
@ -256,7 +269,8 @@ def RepVGG_A0(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[0.75, 0.75, 0.75, 2.5],
        override_groups_map=None,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_A0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_A0"], use_ssld=use_ssld)
    return model


@ -266,7 +280,8 @@ def RepVGG_A1(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[1, 1, 1, 2.5],
        override_groups_map=None,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_A1"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_A1"], use_ssld=use_ssld)
    return model


@ -276,7 +291,8 @@ def RepVGG_A2(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[1.5, 1.5, 1.5, 2.75],
        override_groups_map=None,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_A2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_A2"], use_ssld=use_ssld)
    return model


@ -286,7 +302,8 @@ def RepVGG_B0(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[1, 1, 1, 2.5],
        override_groups_map=None,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B0"], use_ssld=use_ssld)
    return model


@ -296,7 +313,8 @@ def RepVGG_B1(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[2, 2, 2, 4],
        override_groups_map=None,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B1"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B1"], use_ssld=use_ssld)
    return model


@ -306,7 +324,8 @@ def RepVGG_B1g2(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[2, 2, 2, 4],
        override_groups_map=g2_map,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B1g2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B1g2"], use_ssld=use_ssld)
    return model


@ -316,7 +335,8 @@ def RepVGG_B1g4(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[2, 2, 2, 4],
        override_groups_map=g4_map,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B1g4"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B1g4"], use_ssld=use_ssld)
    return model


@ -326,7 +346,8 @@ def RepVGG_B2(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[2.5, 2.5, 2.5, 5],
        override_groups_map=None,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B2"], use_ssld=use_ssld)
    return model


@ -336,7 +357,8 @@ def RepVGG_B2g2(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[2.5, 2.5, 2.5, 5],
        override_groups_map=g2_map,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B2g2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B2g2"], use_ssld=use_ssld)
    return model


@ -346,7 +368,8 @@ def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[2.5, 2.5, 2.5, 5],
        override_groups_map=g4_map,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B2g4"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B2g4"], use_ssld=use_ssld)
    return model


@ -356,7 +379,8 @@ def RepVGG_B3(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[3, 3, 3, 5],
        override_groups_map=None,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B3"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B3"], use_ssld=use_ssld)
    return model


@ -366,7 +390,8 @@ def RepVGG_B3g2(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[3, 3, 3, 5],
        override_groups_map=g2_map,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B3g2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B3g2"], use_ssld=use_ssld)
    return model


@ -376,5 +401,6 @@ def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs):
        width_multiplier=[3, 3, 3, 5],
        override_groups_map=g4_map,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/res2net.py
+++ b/ppcls/arch/backbone/model_zoo/res2net.py
@ -29,9 +29,12 @@ import math

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"Res2Net50_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams",
-              "Res2Net50_14w_8s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "Res2Net50_26w_4s":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams",
+    "Res2Net50_14w_8s":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -151,7 +154,7 @@ class BottleneckBlock(nn.Layer):


 class Res2Net(nn.Layer):
-    def __init__(self, layers=50, scales=4, width=26, class_dim=1000):
+    def __init__(self, layers=50, scales=4, width=26, class_num=1000):
        super(Res2Net, self).__init__()

        self.layers = layers
@ -218,7 +221,7 @@ class Res2Net(nn.Layer):

        self.out = Linear(
            self.pool2d_avg_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc_weights"),
            bias_attr=ParamAttr(name="fc_offset"))
@ -249,11 +252,13 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def Res2Net50_26w_4s(pretrained=False, use_ssld=False, **kwargs):
    model = Res2Net(layers=50, scales=4, width=26, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Res2Net50_26w_4s"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Res2Net50_26w_4s"], use_ssld=use_ssld)
    return model


 def Res2Net50_14w_8s(pretrained=False, use_ssld=False, **kwargs):
    model = Res2Net(layers=50, scales=8, width=14, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Res2Net50_14w_8s"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Res2Net50_14w_8s"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/res2net_vd.py
+++ b/ppcls/arch/backbone/model_zoo/res2net_vd.py
@ -29,10 +29,14 @@ import math

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"Res2Net50_vd_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams",
-              "Res2Net101_vd_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams",
-              "Res2Net200_vd_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "Res2Net50_vd_26w_4s":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams",
+    "Res2Net101_vd_26w_4s":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams",
+    "Res2Net200_vd_26w_4s":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -159,7 +163,7 @@ class BottleneckBlock(nn.Layer):


 class Res2Net_vd(nn.Layer):
-    def __init__(self, layers=50, scales=4, width=26, class_dim=1000):
+    def __init__(self, layers=50, scales=4, width=26, class_num=1000):
        super(Res2Net_vd, self).__init__()

        self.layers = layers
@ -240,7 +244,7 @@ class Res2Net_vd(nn.Layer):

        self.out = Linear(
            self.pool2d_avg_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc_weights"),
            bias_attr=ParamAttr(name="fc_offset"))
@ -273,17 +277,29 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def Res2Net50_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
    model = Res2Net_vd(layers=50, scales=4, width=26, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Res2Net50_vd_26w_4s"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["Res2Net50_vd_26w_4s"],
+        use_ssld=use_ssld)
    return model


 def Res2Net101_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
    model = Res2Net_vd(layers=101, scales=4, width=26, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Res2Net101_vd_26w_4s"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["Res2Net101_vd_26w_4s"],
+        use_ssld=use_ssld)
    return model


 def Res2Net200_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
    model = Res2Net_vd(layers=200, scales=4, width=26, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Res2Net200_vd_26w_4s"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["Res2Net200_vd_26w_4s"],
+        use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/resnest.py
+++ b/ppcls/arch/backbone/model_zoo/resnest.py
@ -29,10 +29,14 @@ from paddle.regularizer import L2Decay

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"ResNeSt50_fast_1s1x64d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams",
-              "ResNeSt50": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams",
-              "ResNeSt101": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "ResNeSt50_fast_1s1x64d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams",
+    "ResNeSt50":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams",
+    "ResNeSt101":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -453,7 +457,7 @@ class ResNeSt(nn.Layer):
                 avd_first=False,
                 final_drop=0.0,
                 last_gamma=False,
-                 class_dim=1000):
+                 class_num=1000):
        super(ResNeSt, self).__init__()

        self.cardinality = groups
@ -643,7 +647,7 @@ class ResNeSt(nn.Layer):

        self.out = Linear(
            self.out_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=nn.initializer.Uniform(-stdv, stdv),
                name="fc_weights"),
@ -690,7 +694,11 @@ def ResNeSt50_fast_1s1x64d(pretrained=False, use_ssld=False, **kwargs):
        avd_first=True,
        final_drop=0.0,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeSt50_fast_1s1x64d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeSt50_fast_1s1x64d"],
+        use_ssld=use_ssld)
    return model


@ -707,7 +715,8 @@ def ResNeSt50(pretrained=False, use_ssld=False, **kwargs):
        avd_first=False,
        final_drop=0.0,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld)
    return model


@ -724,5 +733,6 @@ def ResNeSt101(pretrained=False, use_ssld=False, **kwargs):
        avd_first=False,
        final_drop=0.0,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/resnet_vc.py
+++ b/ppcls/arch/backbone/model_zoo/resnet_vc.py
@ -30,8 +30,9 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "ResNet50_vc": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams",
-             }
+    "ResNet50_vc":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -177,7 +178,7 @@ class BasicBlock(nn.Layer):


 class ResNet_vc(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000):
+    def __init__(self, layers=50, class_num=1000):
        super(ResNet_vc, self).__init__()

        self.layers = layers
@ -270,7 +271,7 @@ class ResNet_vc(nn.Layer):

        self.out = Linear(
            self.pool2d_avg_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
            bias_attr=ParamAttr(name="fc_0.b_0"))
@ -303,6 +304,6 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def ResNet50_vc(pretrained=False, use_ssld=False, **kwargs):
    model = ResNet_vc(layers=50, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNet50_vc"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNet50_vc"], use_ssld=use_ssld)
    return model
-
--- a/ppcls/arch/backbone/model_zoo/resnext.py
+++ b/ppcls/arch/backbone/model_zoo/resnext.py
@ -30,13 +30,19 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "ResNeXt50_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams",
-              "ResNeXt50_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams",
-              "ResNeXt101_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams",
-              "ResNeXt101_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams",
-              "ResNeXt152_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams",
-              "ResNeXt152_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams",
-             }
+    "ResNeXt50_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams",
+    "ResNeXt50_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams",
+    "ResNeXt101_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams",
+    "ResNeXt101_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams",
+    "ResNeXt152_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams",
+    "ResNeXt152_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -143,7 +149,12 @@ class BottleneckBlock(nn.Layer):


 class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32, input_image_channel=3, data_format="NCHW"):
+    def __init__(self,
+                 layers=50,
+                 class_num=1000,
+                 cardinality=32,
+                 input_image_channel=3,
+                 data_format="NCHW"):
        super(ResNeXt, self).__init__()

        self.layers = layers
@ -176,7 +187,8 @@ class ResNeXt(nn.Layer):
            act='relu',
            name="res_conv1",
            data_format=self.data_format)
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1, data_format=self.data_format)
+        self.pool2d_max = MaxPool2D(
+            kernel_size=3, stride=2, padding=1, data_format=self.data_format)

        self.block_list = []
        for block in range(len(depth)):
@ -211,7 +223,7 @@ class ResNeXt(nn.Layer):

        self.out = Linear(
            self.pool2d_avg_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc_weights"),
            bias_attr=ParamAttr(name="fc_offset"))
@ -246,35 +258,41 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=50, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt50_32x4d"], use_ssld=use_ssld)
    return model


 def ResNeXt50_64x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=50, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt50_64x4d"], use_ssld=use_ssld)
    return model


 def ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=101, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt101_32x4d"], use_ssld=use_ssld)
    return model


 def ResNeXt101_64x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=101, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt101_64x4d"], use_ssld=use_ssld)
    return model


 def ResNeXt152_32x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=152, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt152_32x4d"], use_ssld=use_ssld)
    return model


 def ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=152, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt152_64x4d"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
+++ b/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
@ -9,17 +9,19 @@ from paddle.nn.initializer import Uniform
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "ResNeXt101_32x8d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams",
-              "ResNeXt101_32x16d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x816_wsl_pretrained.pdparams",
-              "ResNeXt101_32x32d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams",
-              "ResNeXt101_32x48d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams",
-
-             }
+    "ResNeXt101_32x8d_wsl":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams",
+    "ResNeXt101_32x16d_wsl":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x816_wsl_pretrained.pdparams",
+    "ResNeXt101_32x32d_wsl":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams",
+    "ResNeXt101_32x48d_wsl":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())


-
 class ConvBNLayer(nn.Layer):
    def __init__(self,
                 input_channels,
@ -128,10 +130,10 @@ class BottleneckBlock(nn.Layer):


 class ResNeXt101WSL(nn.Layer):
-    def __init__(self, layers=101, cardinality=32, width=48, class_dim=1000):
+    def __init__(self, layers=101, cardinality=32, width=48, class_num=1000):
        super(ResNeXt101WSL, self).__init__()

-        self.class_dim = class_dim
+        self.class_num = class_num

        self.layers = layers
        self.cardinality = cardinality
@ -384,7 +386,7 @@ class ResNeXt101WSL(nn.Layer):
        self._avg_pool = AdaptiveAvgPool2D(1)
        self._out = Linear(
            num_filters[3] // (width // 8),
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(name="fc.weight"),
            bias_attr=ParamAttr(name="fc.bias"))

@ -450,23 +452,39 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def ResNeXt101_32x8d_wsl(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt101WSL(cardinality=32, width=8, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x8d_wsl"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_32x8d_wsl"],
+        use_ssld=use_ssld)
    return model


 def ResNeXt101_32x16d_wsl(**args):
    model = ResNeXt101WSL(cardinality=32, width=16, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x16d_ws"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_32x16d_ws"],
+        use_ssld=use_ssld)
    return model


 def ResNeXt101_32x32d_wsl(**args):
    model = ResNeXt101WSL(cardinality=32, width=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x32d_wsl"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_32x32d_wsl"],
+        use_ssld=use_ssld)
    return model


 def ResNeXt101_32x48d_wsl(**args):
    model = ResNeXt101WSL(cardinality=32, width=48, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x48d_wsl"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_32x48d_wsl"],
+        use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/resnext_vd.py
+++ b/ppcls/arch/backbone/model_zoo/resnext_vd.py
@ -30,16 +30,23 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "ResNeXt50_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams",
-              "ResNeXt50_vd_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams",
-              "ResNeXt101_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams",
-              "ResNeXt101_vd_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams",
-              "ResNeXt152_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams",
-              "ResNeXt152_vd_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams",
-             }
+    "ResNeXt50_vd_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams",
+    "ResNeXt50_vd_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams",
+    "ResNeXt101_vd_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams",
+    "ResNeXt101_vd_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams",
+    "ResNeXt152_vd_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams",
+    "ResNeXt152_vd_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

+
 class ConvBNLayer(nn.Layer):
    def __init__(
            self,
@ -145,7 +152,7 @@ class BottleneckBlock(nn.Layer):


 class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32):
+    def __init__(self, layers=50, class_num=1000, cardinality=32):
        super(ResNeXt, self).__init__()

        self.layers = layers
@ -225,7 +232,7 @@ class ResNeXt(nn.Layer):

        self.out = Linear(
            self.pool2d_avg_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc_weights"),
            bias_attr=ParamAttr(name="fc_offset"))
@ -242,6 +249,7 @@ class ResNeXt(nn.Layer):
        y = self.out(y)
        return y

+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
    if pretrained is False:
        pass
@ -257,35 +265,53 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=50, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_vd_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt50_vd_32x4d"], use_ssld=use_ssld)
    return model


 def ResNeXt50_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=50, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_vd_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt50_vd_64x4d"], use_ssld=use_ssld)
    return model


 def ResNeXt101_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=101, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_vd_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_vd_32x4d"],
+        use_ssld=use_ssld)
    return model


 def ResNeXt101_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=101, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_vd_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_vd_64x4d"],
+        use_ssld=use_ssld)
    return model


 def ResNeXt152_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=152, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_vd_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt152_vd_32x4d"],
+        use_ssld=use_ssld)
    return model


 def ResNeXt152_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=152, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_vd_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt152_vd_64x4d"],
+        use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/rexnet.py
+++ b/ppcls/arch/backbone/model_zoo/rexnet.py
@ -25,12 +25,17 @@ from math import ceil
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "ReXNet_1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams",
-              "ReXNet_1_3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams",
-              "ReXNet_1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_32x4d_pretrained.pdparams",
-              "ReXNet_2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams",
-              "ReXNet_3_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams",
-             }
+    "ReXNet_1_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams",
+    "ReXNet_1_3":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams",
+    "ReXNet_1_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_32x4d_pretrained.pdparams",
+    "ReXNet_2_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams",
+    "ReXNet_3_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -154,7 +159,7 @@ class ReXNetV1(nn.Layer):
                 final_ch=180,
                 width_mult=1.0,
                 depth_mult=1.0,
-                 class_dim=1000,
+                 class_num=1000,
                 use_se=True,
                 se_ratio=12,
                 dropout_ratio=0.2,
@ -220,7 +225,7 @@ class ReXNetV1(nn.Layer):
        self.output = nn.Sequential(
            nn.Dropout(dropout_ratio),
            nn.Conv2D(
-                pen_channels, class_dim, 1, bias_attr=True))
+                pen_channels, class_num, 1, bias_attr=True))

    def forward(self, x):
        x = self.features(x)
@ -243,29 +248,34 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def ReXNet_1_0(pretrained=False, use_ssld=False, **kwargs):
    model = ReXNetV1(width_mult=1.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_1_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ReXNet_1_0"], use_ssld=use_ssld)
    return model


 def ReXNet_1_3(pretrained=False, use_ssld=False, **kwargs):
    model = ReXNetV1(width_mult=1.3, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_1_3"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ReXNet_1_3"], use_ssld=use_ssld)
    return model


 def ReXNet_1_5(pretrained=False, use_ssld=False, **kwargs):
    model = ReXNetV1(width_mult=1.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_1_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ReXNet_1_5"], use_ssld=use_ssld)
    return model


 def ReXNet_2_0(pretrained=False, use_ssld=False, **kwargs):
    model = ReXNetV1(width_mult=2.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_2_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ReXNet_2_0"], use_ssld=use_ssld)
    return model


 def ReXNet_3_0(pretrained=False, use_ssld=False, **kwargs):
    model = ReXNetV1(width_mult=3.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_3_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ReXNet_3_0"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
@ -29,11 +29,13 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "SE_ResNet18_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams",
-              "SE_ResNet34_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams",
-              "SE_ResNet50_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams",
-
-             }
+    "SE_ResNet18_vd":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams",
+    "SE_ResNet34_vd":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams",
+    "SE_ResNet50_vd":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -240,7 +242,7 @@ class SELayer(nn.Layer):


 class SE_ResNet_vd(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000):
+    def __init__(self, layers=50, class_num=1000):
        super(SE_ResNet_vd, self).__init__()

        self.layers = layers
@ -336,7 +338,7 @@ class SE_ResNet_vd(nn.Layer):

        self.out = Linear(
            self.pool2d_avg_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc6_weights"),
            bias_attr=ParamAttr(name="fc6_offset"))
@ -369,17 +371,20 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def SE_ResNet18_vd(pretrained=False, use_ssld=False, **kwargs):
    model = SE_ResNet_vd(layers=18, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNet18_vd"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SE_ResNet18_vd"], use_ssld=use_ssld)
    return model


 def SE_ResNet34_vd(pretrained=False, use_ssld=False, **kwargs):
    model = SE_ResNet_vd(layers=34, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNet34_vd"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SE_ResNet34_vd"], use_ssld=use_ssld)
    return model


 def SE_ResNet50_vd(pretrained=False, use_ssld=False, **kwargs):
    model = SE_ResNet_vd(layers=50, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNet50_vd"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SE_ResNet50_vd"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/se_resnext.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnext.py
@ -30,11 +30,13 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "SE_ResNeXt50_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams",
-              "SE_ResNeXt101_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams",
-              "SE_ResNeXt152_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt152_64x4d_pretrained.pdparams",
-
-             }
+    "SE_ResNeXt50_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams",
+    "SE_ResNeXt101_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams",
+    "SE_ResNeXt152_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt152_64x4d_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -148,7 +150,12 @@ class BottleneckBlock(nn.Layer):


 class SELayer(nn.Layer):
-    def __init__(self, num_channels, num_filters, reduction_ratio, name=None, data_format="NCHW"):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 reduction_ratio,
+                 name=None,
+                 data_format="NCHW"):
        super(SELayer, self).__init__()

        self.data_format = data_format
@ -193,7 +200,12 @@ class SELayer(nn.Layer):


 class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32, input_image_channel=3, data_format="NCHW"):
+    def __init__(self,
+                 layers=50,
+                 class_num=1000,
+                 cardinality=32,
+                 input_image_channel=3,
+                 data_format="NCHW"):
        super(ResNeXt, self).__init__()

        self.layers = layers
@ -254,7 +266,8 @@ class ResNeXt(nn.Layer):
                name="conv3",
                data_format=self.data_format)

-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1, data_format=self.data_format)
+        self.pool2d_max = MaxPool2D(
+            kernel_size=3, stride=2, padding=1, data_format=self.data_format)

        self.block_list = []
        n = 1 if layers == 50 or layers == 101 else 3
@ -286,13 +299,13 @@ class ResNeXt(nn.Layer):

        self.out = Linear(
            self.pool2d_avg_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc6_weights"),
            bias_attr=ParamAttr(name="fc6_offset"))

    def forward(self, inputs):
-         with paddle.static.amp.fp16_guard():
+        with paddle.static.amp.fp16_guard():
            if self.data_format == "NHWC":
                inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
                inputs.stop_gradient = True
@ -326,17 +339,26 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def SE_ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=50, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt50_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SE_ResNeXt50_32x4d"], use_ssld=use_ssld)
    return model


 def SE_ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=101, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt101_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SE_ResNeXt101_32x4d"],
+        use_ssld=use_ssld)
    return model


 def SE_ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=152, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt152_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SE_ResNeXt152_64x4d"],
+        use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
@ -30,11 +30,13 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "SE_ResNeXt50_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams",
-              "SE_ResNeXt50_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams",
-              "SENet154_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams",
-
-             }
+    "SE_ResNeXt50_vd_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams",
+    "SE_ResNeXt50_vd_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams",
+    "SENet154_vd":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -184,7 +186,7 @@ class SELayer(nn.Layer):


 class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32):
+    def __init__(self, layers=50, class_num=1000, cardinality=32):
        super(ResNeXt, self).__init__()

        self.layers = layers
@ -261,7 +263,7 @@ class ResNeXt(nn.Layer):

        self.out = Linear(
            self.pool2d_avg_channels,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name="fc6_weights"),
            bias_attr=ParamAttr(name="fc6_offset"))
@ -294,17 +296,26 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def SE_ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=50, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt50_vd_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SE_ResNeXt50_vd_32x4d"],
+        use_ssld=use_ssld)
    return model


 def SE_ResNeXt101_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=101, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt101_vd_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SE_ResNeXt101_vd_32x4d"],
+        use_ssld=use_ssld)
    return model


 def SENet154_vd(pretrained=False, use_ssld=False, **kwargs):
    model = ResNeXt(layers=152, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SENet154_vd"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SENet154_vd"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
+++ b/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
@ -25,14 +25,21 @@ from paddle.nn.functional import swish
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "ShuffleNetV2_x0_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams",
-              "ShuffleNetV2_x0_33": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams",
-              "ShuffleNetV2_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams",
-              "ShuffleNetV2_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams",
-              "ShuffleNetV2_x1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams",
-              "ShuffleNetV2_x2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams",
-              "ShuffleNetV2_swish": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams"
-             }
+    "ShuffleNetV2_x0_25":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams",
+    "ShuffleNetV2_x0_33":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams",
+    "ShuffleNetV2_x0_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams",
+    "ShuffleNetV2_x1_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams",
+    "ShuffleNetV2_x1_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams",
+    "ShuffleNetV2_x2_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams",
+    "ShuffleNetV2_swish":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams"
+}

 __all__ = list(MODEL_URLS.keys())

@ -207,10 +214,10 @@ class InvertedResidualDS(Layer):


 class ShuffleNet(Layer):
-    def __init__(self, class_dim=1000, scale=1.0, act="relu"):
+    def __init__(self, class_num=1000, scale=1.0, act="relu"):
        super(ShuffleNet, self).__init__()
        self.scale = scale
-        self.class_dim = class_dim
+        self.class_num = class_num
        stage_repeats = [4, 8, 4]

        if scale == 0.25:
@ -277,7 +284,7 @@ class ShuffleNet(Layer):
        # 5. fc
        self._fc = Linear(
            stage_out_channels[-1],
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(name='fc6_weights'),
            bias_attr=ParamAttr(name='fc6_offset'))

@ -308,41 +315,48 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def ShuffleNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs):
    model = ShuffleNet(scale=0.25, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x0_25"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x0_25"], use_ssld=use_ssld)
    return model


 def ShuffleNetV2_x0_33(pretrained=False, use_ssld=False, **kwargs):
    model = ShuffleNet(scale=0.33, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x0_33"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x0_33"], use_ssld=use_ssld)
    return model


 def ShuffleNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs):
    model = ShuffleNet(scale=0.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x0_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x0_5"], use_ssld=use_ssld)
    return model


 def ShuffleNetV2_x1_0(pretrained=False, use_ssld=False, **kwargs):
    model = ShuffleNet(scale=1.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x1_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x1_0"], use_ssld=use_ssld)
    return model


 def ShuffleNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs):
    model = ShuffleNet(scale=1.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x1_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x1_5"], use_ssld=use_ssld)
    return model


 def ShuffleNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs):
    model = ShuffleNet(scale=2.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x2_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x2_0"], use_ssld=use_ssld)
    return model


 def ShuffleNetV2_swish(pretrained=False, use_ssld=False, **kwargs):
    model = ShuffleNet(scale=1.0, act="swish", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_swish"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_swish"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/squeezenet.py
+++ b/ppcls/arch/backbone/model_zoo/squeezenet.py
@ -22,9 +22,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "SqueezeNet1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_0_pretrained.pdparams",
-              "SqueezeNet1_1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_1_pretrained.pdparams",
-             }
+    "SqueezeNet1_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_0_pretrained.pdparams",
+    "SqueezeNet1_1":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_1_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -78,7 +80,7 @@ class MakeFire(nn.Layer):


 class SqueezeNet(nn.Layer):
-    def __init__(self, version, class_dim=1000):
+    def __init__(self, version, class_num=1000):
        super(SqueezeNet, self).__init__()
        self.version = version

@ -125,7 +127,7 @@ class SqueezeNet(nn.Layer):
        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
        self._conv9 = Conv2D(
            512,
-            class_dim,
+            class_num,
            1,
            weight_attr=ParamAttr(name="conv10_weights"),
            bias_attr=ParamAttr(name="conv10_offset"))
@ -164,6 +166,7 @@ class SqueezeNet(nn.Layer):
        x = paddle.squeeze(x, axis=[2, 3])
        return x

+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
    if pretrained is False:
        pass
@ -179,11 +182,13 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def SqueezeNet1_0(pretrained=False, use_ssld=False, **kwargs):
    model = SqueezeNet(version="1.0", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SqueezeNet1_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SqueezeNet1_0"], use_ssld=use_ssld)
    return model


 def SqueezeNet1_1(pretrained=False, use_ssld=False, **kwargs):
    model = SqueezeNet(version="1.1", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SqueezeNet1_1"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SqueezeNet1_1"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/swin_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/swin_transformer.py
@ -24,13 +24,19 @@ from .vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPat
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "SwinTransformer_tiny_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams",
-              "SwinTransformer_small_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_small_patch4_window7_224_pretrained.pdparams",
-              "SwinTransformer_base_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_pretrained.pdparams",
-              "SwinTransformer_base_patch4_window12_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams",
-              "SwinTransformer_large_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_pretrained.pdparams",
-              "SwinTransformer_large_patch4_window12_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_pretrained.pdparams",
-             }
+    "SwinTransformer_tiny_patch4_window7_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams",
+    "SwinTransformer_small_patch4_window7_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_small_patch4_window7_224_pretrained.pdparams",
+    "SwinTransformer_base_patch4_window7_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_pretrained.pdparams",
+    "SwinTransformer_base_patch4_window12_384":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams",
+    "SwinTransformer_large_patch4_window7_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_pretrained.pdparams",
+    "SwinTransformer_large_patch4_window12_384":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_pretrained.pdparams",
+}

 __all__ = list(MODEL_URLS.keys())

@ -611,7 +617,7 @@ class SwinTransformer(nn.Layer):
                 img_size=224,
                 patch_size=4,
                 in_chans=3,
-                 class_dim=1000,
+                 class_num=1000,
                 embed_dim=96,
                 depths=[2, 2, 6, 2],
                 num_heads=[3, 6, 12, 24],
@ -629,7 +635,7 @@ class SwinTransformer(nn.Layer):
                 **kwargs):
        super(SwinTransformer, self).__init__()

-        self.num_classes = num_classes = class_dim
+        self.num_classes = num_classes = class_num
        self.num_layers = len(depths)
        self.embed_dim = embed_dim
        self.ape = ape
@ -743,7 +749,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
        )


-def SwinTransformer_tiny_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_tiny_patch4_window7_224(pretrained=False,
+                                            use_ssld=False,
+                                            **kwargs):
    model = SwinTransformer(
        embed_dim=96,
        depths=[2, 2, 6, 2],
@ -751,22 +759,34 @@ def SwinTransformer_tiny_patch4_window7_224(pretrained=False, use_ssld=False, **
        window_size=7,
        drop_path_rate=0.2,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_tiny_patch4_window7_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_tiny_patch4_window7_224"],
+        use_ssld=use_ssld)
    return model


-def SwinTransformer_small_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_small_patch4_window7_224(pretrained=False,
+                                             use_ssld=False,
+                                             **kwargs):
    model = SwinTransformer(
        embed_dim=96,
        depths=[2, 2, 18, 2],
        num_heads=[3, 6, 12, 24],
        window_size=7,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_small_patch4_window7_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_small_patch4_window7_224"],
+        use_ssld=use_ssld)
    return model


-def SwinTransformer_base_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_base_patch4_window7_224(pretrained=False,
+                                            use_ssld=False,
+                                            **kwargs):
    model = SwinTransformer(
        embed_dim=128,
        depths=[2, 2, 18, 2],
@ -774,11 +794,17 @@ def SwinTransformer_base_patch4_window7_224(pretrained=False, use_ssld=False, **
        window_size=7,
        drop_path_rate=0.5,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_base_patch4_window7_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_base_patch4_window7_224"],
+        use_ssld=use_ssld)
    return model


-def SwinTransformer_base_patch4_window12_384(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_base_patch4_window12_384(pretrained=False,
+                                             use_ssld=False,
+                                             **kwargs):
    model = SwinTransformer(
        img_size=384,
        embed_dim=128,
@ -787,22 +813,34 @@ def SwinTransformer_base_patch4_window12_384(pretrained=False, use_ssld=False, *
        window_size=12,
        drop_path_rate=0.5,  # NOTE: do not appear in offical code
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_base_patch4_window12_384"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_base_patch4_window12_384"],
+        use_ssld=use_ssld)
    return model


-def SwinTransformer_large_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_large_patch4_window7_224(pretrained=False,
+                                             use_ssld=False,
+                                             **kwargs):
    model = SwinTransformer(
        embed_dim=192,
        depths=[2, 2, 18, 2],
        num_heads=[6, 12, 24, 48],
        window_size=7,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_large_patch4_window7_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_large_patch4_window7_224"],
+        use_ssld=use_ssld)
    return model


-def SwinTransformer_large_patch4_window12_384(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_large_patch4_window12_384(pretrained=False,
+                                              use_ssld=False,
+                                              **kwargs):
    model = SwinTransformer(
        img_size=384,
        embed_dim=192,
@ -810,5 +848,9 @@ def SwinTransformer_large_patch4_window12_384(pretrained=False, use_ssld=False,
        num_heads=[6, 12, 24, 48],
        window_size=12,
        **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_large_patch4_window12_384"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_large_patch4_window12_384"],
+        use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/tnt.py
+++ b/ppcls/arch/backbone/model_zoo/tnt.py
@ -23,16 +23,13 @@ from paddle.nn.initializer import TruncatedNormal, Constant
 from ppcls.arch.backbone.base.theseus_layer import Identity
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-
 MODEL_URLS = {
    "TNT_small":
    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams"
 }

-
 __all__ = MODEL_URLS.keys()

-
 trunc_normal_ = TruncatedNormal(std=.02)
 zeros_ = Constant(value=0.)
 ones_ = Constant(value=1.)
@ -66,8 +63,12 @@ class DropPath(nn.Layer):


 class Mlp(nn.Layer):
-    def __init__(self, in_features, hidden_features=None,
-                 out_features=None, act_layer=nn.GELU, drop=0.):
+    def __init__(self,
+                 in_features,
+                 hidden_features=None,
+                 out_features=None,
+                 act_layer=nn.GELU,
+                 drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
@ -86,14 +87,19 @@ class Mlp(nn.Layer):


 class Attention(nn.Layer):
-    def __init__(self, dim, hidden_dim, num_heads=8, 
-                 qkv_bias=False, attn_drop=0., proj_drop=0.):
+    def __init__(self,
+                 dim,
+                 hidden_dim,
+                 num_heads=8,
+                 qkv_bias=False,
+                 attn_drop=0.,
+                 proj_drop=0.):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        head_dim = hidden_dim // num_heads
        self.head_dim = head_dim
-        self.scale = head_dim ** -0.5
+        self.scale = head_dim**-0.5

        self.qk = nn.Linear(dim, hidden_dim * 2, bias_attr=qkv_bias)
        self.v = nn.Linear(dim, dim, bias_attr=qkv_bias)
@ -103,73 +109,103 @@ class Attention(nn.Layer):

    def forward(self, x):
        B, N, C = x.shape
-        qk = self.qk(x).reshape((B, N, 2, self.num_heads, self.head_dim)).transpose((2, 0, 3, 1, 4))
+        qk = self.qk(x).reshape(
+            (B, N, 2, self.num_heads, self.head_dim)).transpose(
+                (2, 0, 3, 1, 4))

        q, k = qk[0], qk[1]
-        v = self.v(x).reshape((B, N, self.num_heads, -1)).transpose((0, 2, 1, 3))
+        v = self.v(x).reshape((B, N, self.num_heads, -1)).transpose(
+            (0, 2, 1, 3))

-        attn = (q @ k.transpose((0, 1, 3, 2))) * self.scale
+        attn = (q @k.transpose((0, 1, 3, 2))) * self.scale
        attn = nn.functional.softmax(attn, axis=-1)
        attn = self.attn_drop(attn)

-        x = (attn @ v).transpose((0, 2, 1, 3)).reshape((B, N, -1))
+        x = (attn @v).transpose((0, 2, 1, 3)).reshape((B, N, -1))
        x = self.proj(x)
        x = self.proj_drop(x)
        return x


 class Block(nn.Layer):
-    def __init__(self, dim, in_dim, num_pixel, num_heads=12, in_num_head=4, mlp_ratio=4.,
-                 qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
+    def __init__(self,
+                 dim,
+                 in_dim,
+                 num_pixel,
+                 num_heads=12,
+                 in_num_head=4,
+                 mlp_ratio=4.,
+                 qkv_bias=False,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 act_layer=nn.GELU,
                 norm_layer=nn.LayerNorm):
        super().__init__()
        # Inner transformer
        self.norm_in = norm_layer(in_dim)
        self.attn_in = Attention(
-            in_dim, in_dim, num_heads=in_num_head, 
-            qkv_bias=qkv_bias, attn_drop=attn_drop, 
-            proj_drop=drop
-        )
+            in_dim,
+            in_dim,
+            num_heads=in_num_head,
+            qkv_bias=qkv_bias,
+            attn_drop=attn_drop,
+            proj_drop=drop)

        self.norm_mlp_in = norm_layer(in_dim)
-        self.mlp_in = Mlp(
-            in_features=in_dim, hidden_features=int(in_dim * 4),
-            out_features=in_dim, act_layer=act_layer, drop=drop
-        )
+        self.mlp_in = Mlp(in_features=in_dim,
+                          hidden_features=int(in_dim * 4),
+                          out_features=in_dim,
+                          act_layer=act_layer,
+                          drop=drop)

        self.norm1_proj = norm_layer(in_dim)
        self.proj = nn.Linear(in_dim * num_pixel, dim)
        # Outer transformer
        self.norm_out = norm_layer(dim)
        self.attn_out = Attention(
-            dim, dim, num_heads=num_heads, qkv_bias=qkv_bias,
-            attn_drop=attn_drop, proj_drop=drop
-        )
+            dim,
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            attn_drop=attn_drop,
+            proj_drop=drop)

        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()

        self.norm_mlp = norm_layer(dim)
-        self.mlp = Mlp(
-            in_features=dim, hidden_features=int(dim * mlp_ratio),
-            out_features=dim, act_layer=act_layer, drop=drop
-        )
+        self.mlp = Mlp(in_features=dim,
+                       hidden_features=int(dim * mlp_ratio),
+                       out_features=dim,
+                       act_layer=act_layer,
+                       drop=drop)

    def forward(self, pixel_embed, patch_embed):
        # inner
-        pixel_embed = pixel_embed + self.drop_path(self.attn_in(self.norm_in(pixel_embed)))
-        pixel_embed = pixel_embed + self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed)))
+        pixel_embed = pixel_embed + self.drop_path(
+            self.attn_in(self.norm_in(pixel_embed)))
+        pixel_embed = pixel_embed + self.drop_path(
+            self.mlp_in(self.norm_mlp_in(pixel_embed)))
        # outer
        B, N, C = patch_embed.shape
-        patch_embed[:, 1:] = patch_embed[:, 1:] + self.proj(self.norm1_proj(pixel_embed).reshape((B, N - 1, -1)))
-        patch_embed = patch_embed + self.drop_path(self.attn_out(self.norm_out(patch_embed)))
-        patch_embed = patch_embed + self.drop_path(self.mlp(self.norm_mlp(patch_embed)))
+        patch_embed[:, 1:] = patch_embed[:, 1:] + self.proj(
+            self.norm1_proj(pixel_embed).reshape((B, N - 1, -1)))
+        patch_embed = patch_embed + self.drop_path(
+            self.attn_out(self.norm_out(patch_embed)))
+        patch_embed = patch_embed + self.drop_path(
+            self.mlp(self.norm_mlp(patch_embed)))
        return pixel_embed, patch_embed


 class PixelEmbed(nn.Layer):
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, in_dim=48, stride=4):
+    def __init__(self,
+                 img_size=224,
+                 patch_size=16,
+                 in_chans=3,
+                 in_dim=48,
+                 stride=4):
        super().__init__()
-        num_patches = (img_size // patch_size) ** 2
+        num_patches = (img_size // patch_size)**2
        self.img_size = img_size
        self.num_patches = num_patches
        self.in_dim = in_dim
@ -177,10 +213,7 @@ class PixelEmbed(nn.Layer):
        self.new_patch_size = new_patch_size

        self.proj = nn.Conv2D(
-            in_chans, self.in_dim,
-            kernel_size=7, padding=3, 
-            stride=stride
-        )
+            in_chans, self.in_dim, kernel_size=7, padding=3, stride=stride)

    def forward(self, x, pixel_pos):
        B, C, H, W = x.shape
@ -188,50 +221,64 @@ class PixelEmbed(nn.Layer):

        x = self.proj(x)
        x = nn.functional.unfold(x, self.new_patch_size, self.new_patch_size)
-        x = x.transpose((0, 2, 1)).reshape((B * self.num_patches, self.in_dim, self.new_patch_size, self.new_patch_size))
+        x = x.transpose((0, 2, 1)).reshape(
+            (B * self.num_patches, self.in_dim, self.new_patch_size,
+             self.new_patch_size))
        x = x + pixel_pos
-        x = x.reshape((B * self.num_patches, self.in_dim, -1)).transpose((0, 2, 1))
+        x = x.reshape((B * self.num_patches, self.in_dim, -1)).transpose(
+            (0, 2, 1))
        return x


 class TNT(nn.Layer):
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, in_dim=48, depth=12,
-                 num_heads=12, in_num_head=4, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0.,
-                 drop_path_rate=0., norm_layer=nn.LayerNorm, first_stride=4, class_dim=1000):
+    def __init__(self,
+                 img_size=224,
+                 patch_size=16,
+                 in_chans=3,
+                 embed_dim=768,
+                 in_dim=48,
+                 depth=12,
+                 num_heads=12,
+                 in_num_head=4,
+                 mlp_ratio=4.,
+                 qkv_bias=False,
+                 drop_rate=0.,
+                 attn_drop_rate=0.,
+                 drop_path_rate=0.,
+                 norm_layer=nn.LayerNorm,
+                 first_stride=4,
+                 class_num=1000):
        super().__init__()
-        self.class_dim = class_dim
+        self.class_num = class_num
        # num_features for consistency with other models
        self.num_features = self.embed_dim = embed_dim

        self.pixel_embed = PixelEmbed(
-            img_size=img_size, patch_size=patch_size, 
-            in_chans=in_chans, in_dim=in_dim, stride=first_stride
-        )
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=in_chans,
+            in_dim=in_dim,
+            stride=first_stride)
        num_patches = self.pixel_embed.num_patches
        self.num_patches = num_patches
        new_patch_size = self.pixel_embed.new_patch_size
-        num_pixel = new_patch_size ** 2
+        num_pixel = new_patch_size**2

        self.norm1_proj = norm_layer(num_pixel * in_dim)
        self.proj = nn.Linear(num_pixel * in_dim, embed_dim)
        self.norm2_proj = norm_layer(embed_dim)

        self.cls_token = self.create_parameter(
-            shape=(1, 1, embed_dim), 
-            default_initializer=zeros_
-        )
+            shape=(1, 1, embed_dim), default_initializer=zeros_)
        self.add_parameter("cls_token", self.cls_token)

        self.patch_pos = self.create_parameter(
-            shape=(1, num_patches + 1, embed_dim), 
-            default_initializer=zeros_
-        )
+            shape=(1, num_patches + 1, embed_dim), default_initializer=zeros_)
        self.add_parameter("patch_pos", self.patch_pos)

        self.pixel_pos = self.create_parameter(
            shape=(1, in_dim, new_patch_size, new_patch_size),
-            default_initializer=zeros_
-        )
+            default_initializer=zeros_)
        self.add_parameter("pixel_pos", self.pixel_pos)

        self.pos_drop = nn.Dropout(p=drop_rate)
@ -241,17 +288,24 @@ class TNT(nn.Layer):

        blocks = []
        for i in range(depth):
-            blocks.append(Block(
-                dim=embed_dim, in_dim=in_dim, num_pixel=num_pixel, num_heads=num_heads, 
-                in_num_head=in_num_head, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, 
-                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], 
-                norm_layer=norm_layer
-            ))
+            blocks.append(
+                Block(
+                    dim=embed_dim,
+                    in_dim=in_dim,
+                    num_pixel=num_pixel,
+                    num_heads=num_heads,
+                    in_num_head=in_num_head,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[i],
+                    norm_layer=norm_layer))
        self.blocks = nn.LayerList(blocks)
        self.norm = norm_layer(embed_dim)

-        if class_dim > 0:
-            self.head = nn.Linear(embed_dim, class_dim)
+        if class_num > 0:
+            self.head = nn.Linear(embed_dim, class_num)

        trunc_normal_(self.cls_token)
        trunc_normal_(self.patch_pos)
@ -271,8 +325,12 @@ class TNT(nn.Layer):
        B = x.shape[0]
        pixel_embed = self.pixel_embed(x, self.pixel_pos)

-        patch_embed = self.norm2_proj(self.proj(self.norm1_proj(pixel_embed.reshape((B, self.num_patches, -1)))))
-        patch_embed = paddle.concat((self.cls_token.expand((B, -1, -1)), patch_embed), axis=1)
+        patch_embed = self.norm2_proj(
+            self.proj(
+                self.norm1_proj(
+                    pixel_embed.reshape((B, self.num_patches, -1)))))
+        patch_embed = paddle.concat(
+            (self.cls_token.expand((B, -1, -1)), patch_embed), axis=1)
        patch_embed = patch_embed + self.patch_pos
        patch_embed = self.pos_drop(patch_embed)

@ -285,7 +343,7 @@ class TNT(nn.Layer):
    def forward(self, x):
        x = self.forward_features(x)

-        if self.class_dim > 0:
+        if self.class_num > 0:
            x = self.head(x)
        return x

@ -304,15 +362,13 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):


 def TNT_small(pretrained=False, **kwargs):
-    model = TNT(
-        patch_size=16,
-        embed_dim=384,
-        in_dim=24,
-        depth=12,
-        num_heads=6,
-        in_num_head=4,
-        qkv_bias=False,
-        **kwargs
-    )
+    model = TNT(patch_size=16,
+                embed_dim=384,
+                in_dim=24,
+                depth=12,
+                num_heads=6,
+                in_num_head=4,
+                qkv_bias=False,
+                **kwargs)
    _load_pretrained(pretrained, model, MODEL_URLS["TNT_small"])
    return model
--- a/ppcls/arch/backbone/model_zoo/vision_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/vision_transformer.py
@ -231,7 +231,7 @@ class VisionTransformer(nn.Layer):
                 img_size=224,
                 patch_size=16,
                 in_chans=3,
-                 class_dim=1000,
+                 class_num=1000,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
@ -245,7 +245,7 @@ class VisionTransformer(nn.Layer):
                 epsilon=1e-5,
                 **args):
        super().__init__()
-        self.class_dim = class_dim
+        self.class_num = class_num

        self.num_features = self.embed_dim = embed_dim

@ -284,7 +284,7 @@ class VisionTransformer(nn.Layer):

        # Classifier head
        self.head = nn.Linear(embed_dim,
-                              class_dim) if class_dim > 0 else Identity()
+                              class_num) if class_num > 0 else Identity()

        trunc_normal_(self.pos_embed)
        trunc_normal_(self.cls_token)
--- a/ppcls/arch/backbone/model_zoo/xception.py
+++ b/ppcls/arch/backbone/model_zoo/xception.py
@ -8,14 +8,16 @@ from paddle.nn.initializer import Uniform
 import math
 import sys

-
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

 MODEL_URLS = {
-              "Xception41": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_pretrained.pdparams",
-              "Xception65": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_pretrained.pdparams",
-              "Xception71": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception71_pretrained.pdparams"
-             }
+    "Xception41":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_pretrained.pdparams",
+    "Xception65":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_pretrained.pdparams",
+    "Xception71":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception71_pretrained.pdparams"
+}

 __all__ = list(MODEL_URLS.keys())

@ -290,7 +292,7 @@ class ExitFlowBottleneckBlock(nn.Layer):


 class ExitFlow(nn.Layer):
-    def __init__(self, class_dim):
+    def __init__(self, class_num):
        super(ExitFlow, self).__init__()

        name = "exit_flow"
@ -303,7 +305,7 @@ class ExitFlow(nn.Layer):
        stdv = 1.0 / math.sqrt(2048 * 1.0)
        self._out = Linear(
            2048,
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(
                name="fc_weights", initializer=Uniform(-stdv, stdv)),
            bias_attr=ParamAttr(name="fc_offset"))
@ -324,13 +326,13 @@ class Xception(nn.Layer):
    def __init__(self,
                 entry_flow_block_num=3,
                 middle_flow_block_num=8,
-                 class_dim=1000):
+                 class_num=1000):
        super(Xception, self).__init__()
        self.entry_flow_block_num = entry_flow_block_num
        self.middle_flow_block_num = middle_flow_block_num
        self._entry_flow = EntryFlow(entry_flow_block_num)
        self._middle_flow = MiddleFlow(middle_flow_block_num)
-        self._exit_flow = ExitFlow(class_dim)
+        self._exit_flow = ExitFlow(class_num)

    def forward(self, inputs):
        x = self._entry_flow(inputs)
@ -338,6 +340,7 @@ class Xception(nn.Layer):
        x = self._exit_flow(x)
        return x

+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
    if pretrained is False:
        pass
@ -353,17 +356,22 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def Xception41(pretrained=False, use_ssld=False, **kwargs):
    model = Xception(entry_flow_block_num=3, middle_flow_block_num=8, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Xception41"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Xception41"], use_ssld=use_ssld)
    return model


 def Xception65(pretrained=False, use_ssld=False, **kwargs):
-    model = Xception(entry_flow_block_num=3, middle_flow_block_num=16, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld)
+    model = Xception(
+        entry_flow_block_num=3, middle_flow_block_num=16, **kwargs)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld)
    return model


 def Xception71(pretrained=False, use_ssld=False, **kwargs):
-    model = Xception(entry_flow_block_num=5, middle_flow_block_num=16, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld)
+    model = Xception(
+        entry_flow_block_num=5, middle_flow_block_num=16, **kwargs)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/xception_deeplab.py
+++ b/ppcls/arch/backbone/model_zoo/xception_deeplab.py
@ -21,8 +21,12 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D

 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url

-MODEL_URLS = {"Xception41_deeplab": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_deeplab_pretrained.pdparams",
-             "Xception65_deeplab": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_deeplab_pretrained.pdparams"}
+MODEL_URLS = {
+    "Xception41_deeplab":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_deeplab_pretrained.pdparams",
+    "Xception65_deeplab":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_deeplab_pretrained.pdparams"
+}

 __all__ = list(MODEL_URLS.keys())

@ -268,7 +272,7 @@ class Xception_Block(nn.Layer):


 class XceptionDeeplab(nn.Layer):
-    def __init__(self, backbone, class_dim=1000):
+    def __init__(self, backbone, class_num=1000):
        super(XceptionDeeplab, self).__init__()

        bottleneck_params = gen_bottleneck_params(backbone)
@ -370,7 +374,7 @@ class XceptionDeeplab(nn.Layer):
        self._pool = AdaptiveAvgPool2D(1)
        self._fc = Linear(
            self.chns[1][-1],
-            class_dim,
+            class_num,
            weight_attr=ParamAttr(name="fc_weights"),
            bias_attr=ParamAttr(name="fc_bias"))

@ -405,11 +409,13 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):

 def Xception41_deeplab(pretrained=False, use_ssld=False, **kwargs):
    model = XceptionDeeplab('xception_41', **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Xception41_deeplab"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Xception41_deeplab"], use_ssld=use_ssld)
    return model


 def Xception65_deeplab(pretrained=False, use_ssld=False, **kwargs):
    model = XceptionDeeplab("xception_65", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Xception65_deeplab"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Xception65_deeplab"], use_ssld=use_ssld)
    return model