diff --git a/ppcls/arch/backbone/model_zoo/alexnet.py b/ppcls/arch/backbone/model_zoo/alexnet.py
index 3e1d1aa52..b44901a63 100644
--- a/ppcls/arch/backbone/model_zoo/alexnet.py
+++ b/ppcls/arch/backbone/model_zoo/alexnet.py
@@ -23,10 +23,14 @@ import math
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"AlexNet": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams"}
+MODEL_URLS = {
+    "AlexNet":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams"
+}
 
 __all__ = list(MODEL_URLS.keys())
 
+
 class ConvPoolLayer(nn.Layer):
     def __init__(self,
                  input_channels,
@@ -64,7 +68,7 @@ class ConvPoolLayer(nn.Layer):
 
 
 class AlexNetDY(nn.Layer):
-    def __init__(self, class_dim=1000):
+    def __init__(self, class_num=1000):
         super(AlexNetDY, self).__init__()
 
         stdv = 1.0 / math.sqrt(3 * 11 * 11)
@@ -119,7 +123,7 @@ class AlexNetDY(nn.Layer):
                 name="fc7_offset", initializer=Uniform(-stdv, stdv)))
         self._fc8 = Linear(
             in_features=4096,
-            out_features=class_dim,
+            out_features=class_num,
             weight_attr=ParamAttr(
                 name="fc8_weights", initializer=Uniform(-stdv, stdv)),
             bias_attr=ParamAttr(
@@ -143,6 +147,7 @@ class AlexNetDY(nn.Layer):
         x = self._fc8(x)
         return x
 
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -155,7 +160,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
 
+
 def AlexNet(pretrained=False, use_ssld=False, **kwargs):
     model = AlexNetDY(**kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/darknet.py b/ppcls/arch/backbone/model_zoo/darknet.py
index 16b4b8600..75aafd85b 100644
--- a/ppcls/arch/backbone/model_zoo/darknet.py
+++ b/ppcls/arch/backbone/model_zoo/darknet.py
@@ -23,10 +23,14 @@ import math
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"DarkNet53": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams"}
+MODEL_URLS = {
+    "DarkNet53":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams"
+}
 
 __all__ = list(MODEL_URLS.keys())
 
+
 class ConvBNLayer(nn.Layer):
     def __init__(self,
                  input_channels,
@@ -77,7 +81,7 @@ class BasicBlock(nn.Layer):
 
 
 class DarkNet(nn.Layer):
-    def __init__(self, class_dim=1000):
+    def __init__(self, class_num=1000):
         super(DarkNet, self).__init__()
 
         self.stages = [1, 2, 8, 8, 4]
@@ -126,7 +130,7 @@ class DarkNet(nn.Layer):
         stdv = 1.0 / math.sqrt(1024.0)
         self._out = Linear(
             1024,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 name="fc_weights", initializer=Uniform(-stdv, stdv)),
             bias_attr=ParamAttr(name="fc_offset"))
@@ -172,6 +176,7 @@ class DarkNet(nn.Layer):
         x = self._out(x)
         return x
 
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -183,8 +188,10 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
+
+
 def DarkNet53(pretrained=False, use_ssld=False, **kwargs):
     model = DarkNet(**kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/densenet.py b/ppcls/arch/backbone/model_zoo/densenet.py
index 190959b80..7e6e20251 100644
--- a/ppcls/arch/backbone/model_zoo/densenet.py
+++ b/ppcls/arch/backbone/model_zoo/densenet.py
@@ -28,12 +28,18 @@ import math
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"DenseNet121": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams",
-              "DenseNet161": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams",
-              "DenseNet169": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams",
-              "DenseNet201": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams",
-              "DenseNet264": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "DenseNet121":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams",
+    "DenseNet161":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams",
+    "DenseNet169":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams",
+    "DenseNet201":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams",
+    "DenseNet264":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -196,7 +202,7 @@ class ConvBNLayer(nn.Layer):
 
 
 class DenseNet(nn.Layer):
-    def __init__(self, layers=60, bn_size=4, dropout=0, class_dim=1000):
+    def __init__(self, layers=60, bn_size=4, dropout=0, class_num=1000):
         super(DenseNet, self).__init__()
 
         supported_layers = [121, 161, 169, 201, 264]
@@ -269,7 +275,7 @@ class DenseNet(nn.Layer):
 
         self.out = Linear(
             num_features,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc_weights"),
             bias_attr=ParamAttr(name="fc_offset"))
@@ -289,6 +295,7 @@ class DenseNet(nn.Layer):
         y = self.out(y)
         return y
 
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -301,31 +308,37 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
 
+
 def DenseNet121(pretrained=False, use_ssld=False, **kwargs):
     model = DenseNet(layers=121, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld)
     return model
 
 
 def DenseNet161(pretrained=False, use_ssld=False, **kwargs):
     model = DenseNet(layers=161, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld)
     return model
 
 
 def DenseNet169(pretrained=False, use_ssld=False, **kwargs):
     model = DenseNet(layers=169, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld)
     return model
 
 
 def DenseNet201(pretrained=False, use_ssld=False, **kwargs):
     model = DenseNet(layers=201, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld)
     return model
 
 
 def DenseNet264(pretrained=False, use_ssld=False, **kwargs):
     model = DenseNet(layers=264, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py b/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
index b7c36192c..025d36123 100644
--- a/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
@@ -19,15 +19,23 @@ from .vision_transformer import VisionTransformer, Identity, trunc_normal_, zero
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "DeiT_tiny_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams",
-              "DeiT_small_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams",
-              "DeiT_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams",
-              "DeiT_tiny_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams",
-              "DeiT_small_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams",
-              "DeiT_base_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams", 
-              "DeiT_base_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams",
-              "DeiT_base_distilled_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams",
-             }
+    "DeiT_tiny_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams",
+    "DeiT_small_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams",
+    "DeiT_base_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams",
+    "DeiT_tiny_distilled_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams",
+    "DeiT_small_distilled_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams",
+    "DeiT_base_distilled_patch16_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams",
+    "DeiT_base_patch16_384":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams",
+    "DeiT_base_distilled_patch16_384":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -36,7 +44,7 @@ class DistilledVisionTransformer(VisionTransformer):
     def __init__(self,
                  img_size=224,
                  patch_size=16,
-                 class_dim=1000,
+                 class_num=1000,
                  embed_dim=768,
                  depth=12,
                  num_heads=12,
@@ -48,7 +56,7 @@ class DistilledVisionTransformer(VisionTransformer):
         super().__init__(
             img_size=img_size,
             patch_size=patch_size,
-            class_dim=class_dim,
+            class_num=class_num,
             embed_dim=embed_dim,
             depth=depth,
             num_heads=num_heads,
@@ -68,7 +76,7 @@ class DistilledVisionTransformer(VisionTransformer):
 
         self.head_dist = nn.Linear(
             self.embed_dim,
-            self.class_dim) if self.class_dim > 0 else Identity()
+            self.class_num) if self.class_num > 0 else Identity()
 
         trunc_normal_(self.dist_token)
         trunc_normal_(self.pos_embed)
@@ -109,7 +117,7 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
+
 
 def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs):
     model = VisionTransformer(
@@ -121,7 +129,11 @@ def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs):
         qkv_bias=True,
         epsilon=1e-6,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_tiny_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_tiny_patch16_224"],
+        use_ssld=use_ssld)
     return model
 
 
@@ -135,7 +147,11 @@ def DeiT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs):
         qkv_bias=True,
         epsilon=1e-6,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_small_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_small_patch16_224"],
+        use_ssld=use_ssld)
     return model
 
 
@@ -149,11 +165,16 @@ def DeiT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
         qkv_bias=True,
         epsilon=1e-6,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_base_patch16_224"],
+        use_ssld=use_ssld)
     return model
 
 
-def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
+def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False,
+                                    **kwargs):
     model = DistilledVisionTransformer(
         patch_size=16,
         embed_dim=192,
@@ -163,11 +184,17 @@ def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
         qkv_bias=True,
         epsilon=1e-6,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_tiny_distilled_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_tiny_distilled_patch16_224"],
+        use_ssld=use_ssld)
     return model
 
 
-def DeiT_small_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
+def DeiT_small_distilled_patch16_224(pretrained=False,
+                                     use_ssld=False,
+                                     **kwargs):
     model = DistilledVisionTransformer(
         patch_size=16,
         embed_dim=384,
@@ -177,11 +204,16 @@ def DeiT_small_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs)
         qkv_bias=True,
         epsilon=1e-6,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_small_distilled_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_small_distilled_patch16_224"],
+        use_ssld=use_ssld)
     return model
 
 
-def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
+def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False,
+                                    **kwargs):
     model = DistilledVisionTransformer(
         patch_size=16,
         embed_dim=768,
@@ -191,7 +223,11 @@ def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
         qkv_bias=True,
         epsilon=1e-6,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_distilled_patch16_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_base_distilled_patch16_224"],
+        use_ssld=use_ssld)
     return model
 
 
@@ -206,11 +242,16 @@ def DeiT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs):
         qkv_bias=True,
         epsilon=1e-6,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_patch16_384"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_base_patch16_384"],
+        use_ssld=use_ssld)
     return model
 
 
-def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False, **kwargs):
+def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False,
+                                    **kwargs):
     model = DistilledVisionTransformer(
         img_size=384,
         patch_size=16,
@@ -221,5 +262,9 @@ def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False, **kwargs):
         qkv_bias=True,
         epsilon=1e-6,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_distilled_patch16_384"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["DeiT_base_distilled_patch16_384"],
+        use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/dla.py b/ppcls/arch/backbone/model_zoo/dla.py
index 51151710e..669055aeb 100644
--- a/ppcls/arch/backbone/model_zoo/dla.py
+++ b/ppcls/arch/backbone/model_zoo/dla.py
@@ -23,7 +23,6 @@ from paddle.nn.initializer import Normal, Constant
 from ppcls.arch.backbone.base.theseus_layer import Identity
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-
 MODEL_URLS = {
     "DLA34":
     "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams",
@@ -47,10 +46,8 @@ MODEL_URLS = {
     "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams"
 }
 
-
 __all__ = MODEL_URLS.keys()
 
-
 zeros_ = Constant(value=0.)
 ones_ = Constant(value=1.)
 
@@ -59,15 +56,23 @@ class DlaBasic(nn.Layer):
     def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs):
         super(DlaBasic, self).__init__()
         self.conv1 = nn.Conv2D(
-            inplanes, planes, kernel_size=3, stride=stride,
-            padding=dilation, bias_attr=False, dilation=dilation
-        )
+            inplanes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=dilation,
+            bias_attr=False,
+            dilation=dilation)
         self.bn1 = nn.BatchNorm2D(planes)
         self.relu = nn.ReLU()
         self.conv2 = nn.Conv2D(
-            planes, planes, kernel_size=3, stride=1,
-            padding=dilation, bias_attr=False, dilation=dilation
-        )
+            planes,
+            planes,
+            kernel_size=3,
+            stride=1,
+            padding=dilation,
+            bias_attr=False,
+            dilation=dilation)
         self.bn2 = nn.BatchNorm2D(planes)
         self.stride = stride
 
@@ -91,23 +96,34 @@ class DlaBasic(nn.Layer):
 class DlaBottleneck(nn.Layer):
     expansion = 2
 
-    def __init__(self, inplanes, outplanes, stride=1,
-                 dilation=1, cardinality=1, base_width=64):
+    def __init__(self,
+                 inplanes,
+                 outplanes,
+                 stride=1,
+                 dilation=1,
+                 cardinality=1,
+                 base_width=64):
         super(DlaBottleneck, self).__init__()
         self.stride = stride
-        mid_planes = int(math.floor(
-            outplanes * (base_width / 64)) * cardinality)
+        mid_planes = int(
+            math.floor(outplanes * (base_width / 64)) * cardinality)
         mid_planes = mid_planes // self.expansion
 
-        self.conv1 = nn.Conv2D(inplanes, mid_planes, kernel_size=1, bias_attr=False)
+        self.conv1 = nn.Conv2D(
+            inplanes, mid_planes, kernel_size=1, bias_attr=False)
         self.bn1 = nn.BatchNorm2D(mid_planes)
         self.conv2 = nn.Conv2D(
-            mid_planes, mid_planes, kernel_size=3, 
-            stride=stride, padding=dilation, bias_attr=False, 
-            dilation=dilation, groups=cardinality
-        )
+            mid_planes,
+            mid_planes,
+            kernel_size=3,
+            stride=stride,
+            padding=dilation,
+            bias_attr=False,
+            dilation=dilation,
+            groups=cardinality)
         self.bn2 = nn.BatchNorm2D(mid_planes)
-        self.conv3 = nn.Conv2D(mid_planes, outplanes, kernel_size=1, bias_attr=False)
+        self.conv3 = nn.Conv2D(
+            mid_planes, outplanes, kernel_size=1, bias_attr=False)
         self.bn3 = nn.BatchNorm2D(outplanes)
         self.relu = nn.ReLU()
 
@@ -136,9 +152,12 @@ class DlaRoot(nn.Layer):
     def __init__(self, in_channels, out_channels, kernel_size, residual):
         super(DlaRoot, self).__init__()
         self.conv = nn.Conv2D(
-            in_channels, out_channels, 1, stride=1, 
-            bias_attr=False, padding=(kernel_size - 1) // 2
-        )
+            in_channels,
+            out_channels,
+            1,
+            stride=1,
+            bias_attr=False,
+            padding=(kernel_size - 1) // 2)
         self.bn = nn.BatchNorm2D(out_channels)
         self.relu = nn.ReLU()
         self.residual = residual
@@ -155,9 +174,18 @@ class DlaRoot(nn.Layer):
 
 
 class DlaTree(nn.Layer):
-    def __init__(self, levels, block, in_channels, out_channels, 
-                 stride=1,dilation=1, cardinality=1, base_width=64,
-                 level_root=False, root_dim=0, root_kernel_size=1, 
+    def __init__(self,
+                 levels,
+                 block,
+                 in_channels,
+                 out_channels,
+                 stride=1,
+                 dilation=1,
+                 cardinality=1,
+                 base_width=64,
+                 level_root=False,
+                 root_dim=0,
+                 root_kernel_size=1,
                  root_residual=False):
         super(DlaTree, self).__init__()
         if root_dim == 0:
@@ -168,28 +196,45 @@ class DlaTree(nn.Layer):
         self.downsample = nn.MaxPool2D(
             stride, stride=stride) if stride > 1 else Identity()
         self.project = Identity()
-        cargs = dict(dilation=dilation, cardinality=cardinality, base_width=base_width)
+        cargs = dict(
+            dilation=dilation, cardinality=cardinality, base_width=base_width)
 
         if levels == 1:
             self.tree1 = block(in_channels, out_channels, stride, **cargs)
             self.tree2 = block(out_channels, out_channels, 1, **cargs)
             if in_channels != out_channels:
                 self.project = nn.Sequential(
-                    nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, bias_attr=False),
+                    nn.Conv2D(
+                        in_channels,
+                        out_channels,
+                        kernel_size=1,
+                        stride=1,
+                        bias_attr=False),
                     nn.BatchNorm2D(out_channels))
         else:
-            cargs.update(dict(root_kernel_size=root_kernel_size, root_residual=root_residual))
+            cargs.update(
+                dict(
+                    root_kernel_size=root_kernel_size,
+                    root_residual=root_residual))
             self.tree1 = DlaTree(
-                levels - 1, block, in_channels, 
-                out_channels, stride, root_dim=0, **cargs
-            )
+                levels - 1,
+                block,
+                in_channels,
+                out_channels,
+                stride,
+                root_dim=0,
+                **cargs)
             self.tree2 = DlaTree(
-                levels - 1, block, out_channels, 
-                out_channels, root_dim=root_dim + out_channels, **cargs
-            )
+                levels - 1,
+                block,
+                out_channels,
+                out_channels,
+                root_dim=root_dim + out_channels,
+                **cargs)
 
         if levels == 1:
-            self.root = DlaRoot(root_dim, out_channels, root_kernel_size, root_residual)
+            self.root = DlaRoot(root_dim, out_channels, root_kernel_size,
+                                root_residual)
 
         self.level_root = level_root
         self.root_dim = root_dim
@@ -214,12 +259,20 @@ class DlaTree(nn.Layer):
 
 
 class DLA(nn.Layer):
-    def __init__(self, levels, channels, in_chans=3, cardinality=1,
-                 base_width=64, block=DlaBottleneck, residual_root=False,
-                 drop_rate=0.0, class_dim=1000, with_pool=True):
+    def __init__(self,
+                 levels,
+                 channels,
+                 in_chans=3,
+                 cardinality=1,
+                 base_width=64,
+                 block=DlaBottleneck,
+                 residual_root=False,
+                 drop_rate=0.0,
+                 class_num=1000,
+                 with_pool=True):
         super(DLA, self).__init__()
         self.channels = channels
-        self.class_dim = class_dim
+        self.class_num = class_num
         self.with_pool = with_pool
         self.cardinality = cardinality
         self.base_width = base_width
@@ -227,46 +280,72 @@ class DLA(nn.Layer):
 
         self.base_layer = nn.Sequential(
             nn.Conv2D(
-                in_chans, channels[0], kernel_size=7,
-                stride=1, padding=3, bias_attr=False
-            ),
+                in_chans,
+                channels[0],
+                kernel_size=7,
+                stride=1,
+                padding=3,
+                bias_attr=False),
             nn.BatchNorm2D(channels[0]),
             nn.ReLU())
 
-        self.level0 = self._make_conv_level(channels[0], channels[0], levels[0])
-        self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2)
+        self.level0 = self._make_conv_level(channels[0], channels[0],
+                                            levels[0])
+        self.level1 = self._make_conv_level(
+            channels[0], channels[1], levels[1], stride=2)
 
         cargs = dict(
-            cardinality=cardinality, 
-            base_width=base_width, 
-            root_residual=residual_root
-        )
+            cardinality=cardinality,
+            base_width=base_width,
+            root_residual=residual_root)
 
         self.level2 = DlaTree(
-            levels[2], block, channels[1], 
-            channels[2], 2, level_root=False, **cargs
-        )
+            levels[2],
+            block,
+            channels[1],
+            channels[2],
+            2,
+            level_root=False,
+            **cargs)
         self.level3 = DlaTree(
-            levels[3], block, channels[2], 
-            channels[3], 2, level_root=True, **cargs
-        )
+            levels[3],
+            block,
+            channels[2],
+            channels[3],
+            2,
+            level_root=True,
+            **cargs)
         self.level4 = DlaTree(
-            levels[4], block, channels[3], 
-            channels[4], 2, level_root=True, **cargs
-        )
+            levels[4],
+            block,
+            channels[3],
+            channels[4],
+            2,
+            level_root=True,
+            **cargs)
         self.level5 = DlaTree(
-            levels[5], block, channels[4], 
-            channels[5], 2, level_root=True, **cargs
-        )
+            levels[5],
+            block,
+            channels[4],
+            channels[5],
+            2,
+            level_root=True,
+            **cargs)
 
         self.feature_info = [
             # rare to have a meaningful stride 1 level
-            dict(num_chs=channels[0], reduction=1, module='level0'),
-            dict(num_chs=channels[1], reduction=2, module='level1'),
-            dict(num_chs=channels[2], reduction=4, module='level2'),
-            dict(num_chs=channels[3], reduction=8, module='level3'),
-            dict(num_chs=channels[4], reduction=16, module='level4'),
-            dict(num_chs=channels[5], reduction=32, module='level5'),
+            dict(
+                num_chs=channels[0], reduction=1, module='level0'),
+            dict(
+                num_chs=channels[1], reduction=2, module='level1'),
+            dict(
+                num_chs=channels[2], reduction=4, module='level2'),
+            dict(
+                num_chs=channels[3], reduction=8, module='level3'),
+            dict(
+                num_chs=channels[4], reduction=16, module='level4'),
+            dict(
+                num_chs=channels[5], reduction=32, module='level5'),
         ]
 
         self.num_features = channels[-1]
@@ -274,8 +353,8 @@ class DLA(nn.Layer):
         if with_pool:
             self.global_pool = nn.AdaptiveAvgPool2D(1)
 
-        if class_dim > 0:
-            self.fc = nn.Conv2D(self.num_features, class_dim, 1)
+        if class_num > 0:
+            self.fc = nn.Conv2D(self.num_features, class_num, 1)
 
         for m in self.sublayers():
             if isinstance(m, nn.Conv2D):
@@ -291,12 +370,14 @@ class DLA(nn.Layer):
         for i in range(convs):
             modules.extend([
                 nn.Conv2D(
-                    inplanes, planes, kernel_size=3, 
+                    inplanes,
+                    planes,
+                    kernel_size=3,
                     stride=stride if i == 0 else 1,
-                    padding=dilation, bias_attr=False, dilation=dilation
-                ),
-                nn.BatchNorm2D(planes),
-                nn.ReLU()])
+                    padding=dilation,
+                    bias_attr=False,
+                    dilation=dilation), nn.BatchNorm2D(planes), nn.ReLU()
+            ])
             inplanes = planes
         return nn.Sequential(*modules)
 
@@ -321,7 +402,7 @@ class DLA(nn.Layer):
         if self.drop_rate > 0.:
             x = F.dropout(x, p=self.drop_rate, training=self.training)
 
-        if self.class_dim > 0:
+        if self.class_num > 0:
             x = self.fc(x)
             x = x.flatten(1)
 
@@ -342,124 +423,104 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
 
 
 def DLA34(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 2, 1),
-        channels=(16, 32, 64, 128, 256, 512),
-        block=DlaBasic,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 2, 1),
+                channels=(16, 32, 64, 128, 256, 512),
+                block=DlaBasic,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DLA34"])
     return model
 
 
 def DLA46_c(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 2, 1),
-        channels=(16, 32, 64, 64, 128, 256),
-        block=DlaBottleneck,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 2, 1),
+                channels=(16, 32, 64, 64, 128, 256),
+                block=DlaBottleneck,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"])
     return model
 
 
 def DLA46x_c(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 2, 1),
-        channels=(16, 32, 64, 64, 128, 256),
-        block=DlaBottleneck,
-        cardinality=32,
-        base_width=4,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 2, 1),
+                channels=(16, 32, 64, 64, 128, 256),
+                block=DlaBottleneck,
+                cardinality=32,
+                base_width=4,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"])
     return model
 
 
 def DLA60(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 3, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 3, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DLA60"])
     return model
 
 
 def DLA60x(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 3, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        cardinality=32,
-        base_width=4,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 3, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                cardinality=32,
+                base_width=4,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DLA60x"])
     return model
 
 
 def DLA60x_c(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 2, 3, 1),
-        channels=(16, 32, 64, 64, 128, 256),
-        block=DlaBottleneck,
-        cardinality=32,
-        base_width=4,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 2, 3, 1),
+                channels=(16, 32, 64, 64, 128, 256),
+                block=DlaBottleneck,
+                cardinality=32,
+                base_width=4,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"])
     return model
 
 
 def DLA102(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 3, 4, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        residual_root=True,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 3, 4, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                residual_root=True,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DLA102"])
     return model
 
 
 def DLA102x(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 3, 4, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        cardinality=32,
-        base_width=4,
-        residual_root=True,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 3, 4, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                cardinality=32,
+                base_width=4,
+                residual_root=True,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DLA102x"])
     return model
 
 
 def DLA102x2(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 1, 3, 4, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        cardinality=64,
-        base_width=4,
-        residual_root=True,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 1, 3, 4, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                cardinality=64,
+                base_width=4,
+                residual_root=True,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"])
     return model
 
 
 def DLA169(pretrained=False, **kwargs):
-    model = DLA(
-        levels=(1, 1, 2, 3, 5, 1),
-        channels=(16, 32, 128, 256, 512, 1024),
-        block=DlaBottleneck,
-        residual_root=True,
-        **kwargs
-    )
+    model = DLA(levels=(1, 1, 2, 3, 5, 1),
+                channels=(16, 32, 128, 256, 512, 1024),
+                block=DlaBottleneck,
+                residual_root=True,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DLA169"])
     return model
diff --git a/ppcls/arch/backbone/model_zoo/dpn.py b/ppcls/arch/backbone/model_zoo/dpn.py
index 7741eb7ce..55953ed20 100644
--- a/ppcls/arch/backbone/model_zoo/dpn.py
+++ b/ppcls/arch/backbone/model_zoo/dpn.py
@@ -29,12 +29,18 @@ import math
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"DPN68": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams",
-              "DPN92": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams",
-              "DPN98": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams",
-              "DPN107": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams",
-              "DPN131": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "DPN68":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams",
+    "DPN92":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams",
+    "DPN98":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams",
+    "DPN107":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams",
+    "DPN131":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -211,10 +217,10 @@ class DualPathFactory(nn.Layer):
 
 
 class DPN(nn.Layer):
-    def __init__(self, layers=68, class_dim=1000):
+    def __init__(self, layers=68, class_num=1000):
         super(DPN, self).__init__()
 
-        self._class_dim = class_dim
+        self._class_num = class_num
 
         args = self.get_net_args(layers)
         bws = args['bw']
@@ -309,7 +315,7 @@ class DPN(nn.Layer):
 
         self.out = Linear(
             out_channel,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc_weights"),
             bias_attr=ParamAttr(name="fc_offset"))
@@ -400,7 +406,8 @@ class DPN(nn.Layer):
         net_arg['init_padding'] = init_padding
 
         return net_arg
-    
+
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -411,7 +418,7 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     else:
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
-        )  
+        )
 
 
 def DPN68(pretrained=False, use_ssld=False, **kwargs):
@@ -441,4 +448,4 @@ def DPN107(pretrained=False, use_ssld=False, **kwargs):
 def DPN131(pretrained=False, use_ssld=False, **kwargs):
     model = DPN(layers=131, **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["DPN131"])
-    return model
\ No newline at end of file
+    return model
diff --git a/ppcls/arch/backbone/model_zoo/efficientnet.py b/ppcls/arch/backbone/model_zoo/efficientnet.py
index de2d52459..22b7fd1d8 100644
--- a/ppcls/arch/backbone/model_zoo/efficientnet.py
+++ b/ppcls/arch/backbone/model_zoo/efficientnet.py
@@ -11,16 +11,26 @@ import copy
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"EfficientNetB0_small":  "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams",
-              "EfficientNetB0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams",
-              "EfficientNetB1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams",
-              "EfficientNetB2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams",
-              "EfficientNetB3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams",
-              "EfficientNetB4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams",
-              "EfficientNetB5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams",
-              "EfficientNetB6": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams",
-              "EfficientNetB7": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "EfficientNetB0_small":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams",
+    "EfficientNetB0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams",
+    "EfficientNetB1":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams",
+    "EfficientNetB2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams",
+    "EfficientNetB3":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams",
+    "EfficientNetB4":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams",
+    "EfficientNetB5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams",
+    "EfficientNetB6":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams",
+    "EfficientNetB7":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -725,7 +735,7 @@ class EfficientNet(nn.Layer):
                  padding_type="SAME",
                  override_params=None,
                  use_se=True,
-                 class_dim=1000):
+                 class_num=1000):
         super(EfficientNet, self).__init__()
 
         model_name = 'efficientnet-' + name
@@ -778,7 +788,7 @@ class EfficientNet(nn.Layer):
         param_attr, bias_attr = init_fc_layer("_fc")
         self._fc = Linear(
             output_channels,
-            class_dim,
+            class_num,
             weight_attr=param_attr,
             bias_attr=bias_attr)
 
@@ -792,7 +802,7 @@ class EfficientNet(nn.Layer):
         x = self._fc(x)
         return x
 
-    
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -803,14 +813,14 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     else:
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
-        )  
+        )
 
 
 def EfficientNetB0_small(padding_type='DYNAMIC',
                          override_params=None,
                          use_se=False,
-                         pretrained=False, 
-                         use_ssld=False, 
+                         pretrained=False,
+                         use_ssld=False,
                          **kwargs):
     model = EfficientNet(
         name='b0',
@@ -825,8 +835,8 @@ def EfficientNetB0_small(padding_type='DYNAMIC',
 def EfficientNetB0(padding_type='SAME',
                    override_params=None,
                    use_se=True,
-                   pretrained=False, 
-                   use_ssld=False, 
+                   pretrained=False,
+                   use_ssld=False,
                    **kwargs):
     model = EfficientNet(
         name='b0',
@@ -841,8 +851,8 @@ def EfficientNetB0(padding_type='SAME',
 def EfficientNetB1(padding_type='SAME',
                    override_params=None,
                    use_se=True,
-                   pretrained=False, 
-                   use_ssld=False, 
+                   pretrained=False,
+                   use_ssld=False,
                    **kwargs):
     model = EfficientNet(
         name='b1',
@@ -857,8 +867,8 @@ def EfficientNetB1(padding_type='SAME',
 def EfficientNetB2(padding_type='SAME',
                    override_params=None,
                    use_se=True,
-                   pretrained=False, 
-                   use_ssld=False, 
+                   pretrained=False,
+                   use_ssld=False,
                    **kwargs):
     model = EfficientNet(
         name='b2',
@@ -873,8 +883,8 @@ def EfficientNetB2(padding_type='SAME',
 def EfficientNetB3(padding_type='SAME',
                    override_params=None,
                    use_se=True,
-                   pretrained=False, 
-                   use_ssld=False, 
+                   pretrained=False,
+                   use_ssld=False,
                    **kwargs):
     model = EfficientNet(
         name='b3',
@@ -889,8 +899,8 @@ def EfficientNetB3(padding_type='SAME',
 def EfficientNetB4(padding_type='SAME',
                    override_params=None,
                    use_se=True,
-                   pretrained=False, 
-                   use_ssld=False, 
+                   pretrained=False,
+                   use_ssld=False,
                    **kwargs):
     model = EfficientNet(
         name='b4',
@@ -905,8 +915,8 @@ def EfficientNetB4(padding_type='SAME',
 def EfficientNetB5(padding_type='SAME',
                    override_params=None,
                    use_se=True,
-                   pretrained=False, 
-                   use_ssld=False, 
+                   pretrained=False,
+                   use_ssld=False,
                    **kwargs):
     model = EfficientNet(
         name='b5',
@@ -921,8 +931,8 @@ def EfficientNetB5(padding_type='SAME',
 def EfficientNetB6(padding_type='SAME',
                    override_params=None,
                    use_se=True,
-                   pretrained=False, 
-                   use_ssld=False, 
+                   pretrained=False,
+                   use_ssld=False,
                    **kwargs):
     model = EfficientNet(
         name='b6',
@@ -937,8 +947,8 @@ def EfficientNetB6(padding_type='SAME',
 def EfficientNetB7(padding_type='SAME',
                    override_params=None,
                    use_se=True,
-                   pretrained=False, 
-                   use_ssld=False, 
+                   pretrained=False,
+                   use_ssld=False,
                    **kwargs):
     model = EfficientNet(
         name='b7',
@@ -947,4 +957,4 @@ def EfficientNetB7(padding_type='SAME',
         use_se=use_se,
         **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB7"])
-    return model
\ No newline at end of file
+    return model
diff --git a/ppcls/arch/backbone/model_zoo/ghostnet.py b/ppcls/arch/backbone/model_zoo/ghostnet.py
index e557e0f9f..4a16d8a57 100644
--- a/ppcls/arch/backbone/model_zoo/ghostnet.py
+++ b/ppcls/arch/backbone/model_zoo/ghostnet.py
@@ -23,10 +23,14 @@ from paddle.nn.initializer import Uniform, KaimingNormal
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"GhostNet_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams",
-              "GhostNet_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams",
-              "GhostNet_x1_3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "GhostNet_x0_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams",
+    "GhostNet_x1_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams",
+    "GhostNet_x1_3":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -215,7 +219,7 @@ class GhostBottleneck(nn.Layer):
 
 
 class GhostNet(nn.Layer):
-    def __init__(self, scale, class_dim=1000):
+    def __init__(self, scale, class_num=1000):
         super(GhostNet, self).__init__()
         self.cfgs = [
             # k, t, c, SE, s
@@ -290,7 +294,7 @@ class GhostNet(nn.Layer):
         stdv = 1.0 / math.sqrt(self._fc0_output_channels * 1.0)
         self.fc_1 = Linear(
             self._fc0_output_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 name="fc_1_weights", initializer=Uniform(-stdv, stdv)),
             bias_attr=ParamAttr(name="fc_1_offset"))
@@ -322,7 +326,7 @@ class GhostNet(nn.Layer):
             new_v += divisor
         return new_v
 
-    
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -338,17 +342,20 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
 
 def GhostNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
     model = GhostNet(scale=0.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld)
     return model
 
 
 def GhostNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
     model = GhostNet(scale=1.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld)
     return model
 
 
 def GhostNet_x1_3(pretrained=False, use_ssld=False, **kwargs):
     model = GhostNet(scale=1.3, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/googlenet.py b/ppcls/arch/backbone/model_zoo/googlenet.py
index 7ef35a964..00b7feeb9 100644
--- a/ppcls/arch/backbone/model_zoo/googlenet.py
+++ b/ppcls/arch/backbone/model_zoo/googlenet.py
@@ -10,8 +10,10 @@ import math
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"GoogLeNet": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "GoogLeNet":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -101,7 +103,7 @@ class Inception(nn.Layer):
 
 
 class GoogLeNetDY(nn.Layer):
-    def __init__(self, class_dim=1000):
+    def __init__(self, class_num=1000):
         super(GoogLeNetDY, self).__init__()
         self._conv = ConvLayer(3, 64, 7, 2, name="conv1")
         self._pool = MaxPool2D(kernel_size=3, stride=2)
@@ -134,7 +136,7 @@ class GoogLeNetDY(nn.Layer):
         self._drop = Dropout(p=0.4, mode="downscale_in_infer")
         self._fc_out = Linear(
             1024,
-            class_dim,
+            class_num,
             weight_attr=xavier(1024, 1, "out"),
             bias_attr=ParamAttr(name="out_offset"))
         self._pool_o1 = AvgPool2D(kernel_size=5, stride=3)
@@ -147,7 +149,7 @@ class GoogLeNetDY(nn.Layer):
         self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer")
         self._out1 = Linear(
             1024,
-            class_dim,
+            class_num,
             weight_attr=xavier(1024, 1, "out1"),
             bias_attr=ParamAttr(name="out1_offset"))
         self._pool_o2 = AvgPool2D(kernel_size=5, stride=3)
@@ -160,7 +162,7 @@ class GoogLeNetDY(nn.Layer):
         self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer")
         self._out2 = Linear(
             1024,
-            class_dim,
+            class_num,
             weight_attr=xavier(1024, 1, "out2"),
             bias_attr=ParamAttr(name="out2_offset"))
 
@@ -205,8 +207,8 @@ class GoogLeNetDY(nn.Layer):
         x = self._drop_o2(x)
         out2 = self._out2(x)
         return [out, out1, out2]
-    
-    
+
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -222,5 +224,6 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
 
 def GoogLeNet(pretrained=False, use_ssld=False, **kwargs):
     model = GoogLeNetDY(**kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/gvt.py b/ppcls/arch/backbone/model_zoo/gvt.py
index 659be4964..8453cc27a 100644
--- a/ppcls/arch/backbone/model_zoo/gvt.py
+++ b/ppcls/arch/backbone/model_zoo/gvt.py
@@ -25,18 +25,23 @@ from .vision_transformer import Block as ViTBlock
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "pcpvt_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams",
-              "pcpvt_base": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams",
-              "pcpvt_large": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams",
-              "alt_gvt_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams",
-              "alt_gvt_base": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams",
-              "alt_gvt_large": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams"
-             }
+    "pcpvt_small":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams",
+    "pcpvt_base":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams",
+    "pcpvt_large":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams",
+    "alt_gvt_small":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams",
+    "alt_gvt_base":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams",
+    "alt_gvt_large":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams"
+}
 
 __all__ = list(MODEL_URLS.keys())
 
 
-
 class GroupAttention(nn.Layer):
     """LSA: self attention within a group.
     """
@@ -522,7 +527,7 @@ class ALTGVT(PCPVT):
                  img_size=224,
                  patch_size=4,
                  in_chans=3,
-                 class_dim=1000,
+                 class_num=1000,
                  embed_dims=[64, 128, 256],
                  num_heads=[1, 2, 4],
                  mlp_ratios=[4, 4, 4],
@@ -536,7 +541,7 @@ class ALTGVT(PCPVT):
                  sr_ratios=[4, 2, 1],
                  block_cls=GroupBlock,
                  wss=[7, 7, 7]):
-        super().__init__(img_size, patch_size, in_chans, class_dim, embed_dims,
+        super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
                          num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
                          attn_drop_rate, drop_path_rate, norm_layer, depths,
                          sr_ratios, block_cls)
@@ -568,6 +573,7 @@ class ALTGVT(PCPVT):
             cur += depths[k]
         self.apply(self._init_weights)
 
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -593,7 +599,8 @@ def pcpvt_small(pretrained=False, use_ssld=False, **kwargs):
         depths=[3, 4, 6, 3],
         sr_ratios=[8, 4, 2, 1],
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["pcpvt_small"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["pcpvt_small"], use_ssld=use_ssld)
     return model
 
 
@@ -609,7 +616,8 @@ def pcpvt_base(pretrained=False, use_ssld=False, **kwargs):
         depths=[3, 4, 18, 3],
         sr_ratios=[8, 4, 2, 1],
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["pcpvt_base"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["pcpvt_base"], use_ssld=use_ssld)
     return model
 
 
@@ -625,7 +633,8 @@ def pcpvt_large(pretrained=False, use_ssld=False, **kwargs):
         depths=[3, 8, 27, 3],
         sr_ratios=[8, 4, 2, 1],
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["pcpvt_large"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["pcpvt_large"], use_ssld=use_ssld)
     return model
 
 
@@ -642,7 +651,8 @@ def alt_gvt_small(pretrained=False, use_ssld=False, **kwargs):
         wss=[7, 7, 7, 7],
         sr_ratios=[8, 4, 2, 1],
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["alt_gvt_small"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["alt_gvt_small"], use_ssld=use_ssld)
     return model
 
 
@@ -659,7 +669,8 @@ def alt_gvt_base(pretrained=False, use_ssld=False, **kwargs):
         wss=[7, 7, 7, 7],
         sr_ratios=[8, 4, 2, 1],
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["alt_gvt_base"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["alt_gvt_base"], use_ssld=use_ssld)
     return model
 
 
@@ -676,5 +687,6 @@ def alt_gvt_large(pretrained=False, use_ssld=False, **kwargs):
         wss=[7, 7, 7, 7],
         sr_ratios=[8, 4, 2, 1],
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["alt_gvt_large"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["alt_gvt_large"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/hardnet.py b/ppcls/arch/backbone/model_zoo/hardnet.py
index b3d5f9a45..112dc3dd8 100644
--- a/ppcls/arch/backbone/model_zoo/hardnet.py
+++ b/ppcls/arch/backbone/model_zoo/hardnet.py
@@ -17,7 +17,6 @@ import paddle.nn as nn
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-
 MODEL_URLS = {
     'HarDNet39_ds':
     'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet39_ds_pretrained.pdparams',
@@ -29,51 +28,70 @@ MODEL_URLS = {
     'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet85_pretrained.pdparams'
 }
 
-
 __all__ = MODEL_URLS.keys()
 
 
-def ConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
+def ConvLayer(in_channels,
+              out_channels,
+              kernel_size=3,
+              stride=1,
+              bias_attr=False):
     layer = nn.Sequential(
         ('conv', nn.Conv2D(
-            in_channels, out_channels, kernel_size=kernel_size,
-            stride=stride, padding=kernel_size//2, groups=1, bias_attr=bias_attr
-        )),
-        ('norm', nn.BatchNorm2D(out_channels)),
-        ('relu', nn.ReLU6())
-    )
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=kernel_size // 2,
+            groups=1,
+            bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)),
+        ('relu', nn.ReLU6()))
     return layer
 
 
-def DWConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
+def DWConvLayer(in_channels,
+                out_channels,
+                kernel_size=3,
+                stride=1,
+                bias_attr=False):
     layer = nn.Sequential(
         ('dwconv', nn.Conv2D(
-            in_channels, out_channels, kernel_size=kernel_size,
-            stride=stride, padding=1, groups=out_channels, bias_attr=bias_attr
-        )),
-        ('norm', nn.BatchNorm2D(out_channels))
-    )
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=1,
+            groups=out_channels,
+            bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)))
     return layer
 
 
 def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
     layer = nn.Sequential(
-        ('layer1', ConvLayer(in_channels, out_channels, kernel_size=kernel_size)),
-        ('layer2', DWConvLayer(out_channels, out_channels, stride=stride))
-    )
+        ('layer1', ConvLayer(
+            in_channels, out_channels, kernel_size=kernel_size)),
+        ('layer2', DWConvLayer(
+            out_channels, out_channels, stride=stride)))
     return layer
 
 
 class HarDBlock(nn.Layer):
-    def __init__(self, in_channels, growth_rate, grmul, n_layers, 
-                 keepBase=False, residual_out=False, dwconv=False):
+    def __init__(self,
+                 in_channels,
+                 growth_rate,
+                 grmul,
+                 n_layers,
+                 keepBase=False,
+                 residual_out=False,
+                 dwconv=False):
         super().__init__()
         self.keepBase = keepBase
         self.links = []
         layers_ = []
         self.out_channels = 0  # if upsample else in_channels
         for i in range(n_layers):
-            outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
+            outch, inch, link = self.get_link(i + 1, in_channels, growth_rate,
+                                              grmul)
             self.links.append(link)
             if dwconv:
                 layers_.append(CombConvLayer(inch, outch))
@@ -92,7 +110,7 @@ class HarDBlock(nn.Layer):
 
         link = []
         for i in range(10):
-            dv = 2 ** i
+            dv = 2**i
             if layer % dv == 0:
                 k = layer - dv
                 link.append(k)
@@ -126,7 +144,7 @@ class HarDBlock(nn.Layer):
         t = len(layers_)
         out_ = []
         for i in range(t):
-            if (i == 0 and self.keepBase) or (i == t-1) or (i % 2 == 1):
+            if (i == 0 and self.keepBase) or (i == t - 1) or (i % 2 == 1):
                 out_.append(layers_[i])
         out = paddle.concat(out_, 1)
 
@@ -134,8 +152,11 @@ class HarDBlock(nn.Layer):
 
 
 class HarDNet(nn.Layer):
-    def __init__(self, depth_wise=False, arch=85,
-                 class_dim=1000, with_pool=True):
+    def __init__(self,
+                 depth_wise=False,
+                 arch=85,
+                 class_num=1000,
+                 with_pool=True):
         super().__init__()
         first_ch = [32, 64]
         second_kernel = 3
@@ -146,16 +167,16 @@ class HarDNet(nn.Layer):
         # HarDNet68
         ch_list = [128, 256, 320, 640, 1024]
         gr = [14, 16, 20, 40, 160]
-        n_layers = [8, 16, 16, 16,  4]
-        downSamp = [1,  0,  1,  1,  0]
+        n_layers = [8, 16, 16, 16, 4]
+        downSamp = [1, 0, 1, 1, 0]
 
         if arch == 85:
             # HarDNet85
             first_ch = [48, 96]
             ch_list = [192, 256, 320, 480, 720, 1280]
-            gr = [24,  24,  28,  36,  48, 256]
-            n_layers = [8,  16,  16,  16,  16,   4]
-            downSamp = [1,   0,   1,   0,   1,   0]
+            gr = [24, 24, 28, 36, 48, 256]
+            n_layers = [8, 16, 16, 16, 16, 4]
+            downSamp = [1, 0, 1, 0, 1, 0]
             drop_rate = 0.2
 
         elif arch == 39:
@@ -163,9 +184,9 @@ class HarDNet(nn.Layer):
             first_ch = [24, 48]
             ch_list = [96, 320, 640, 1024]
             grmul = 1.6
-            gr = [16,  20, 64, 160]
-            n_layers = [4,  16,  8,   4]
-            downSamp = [1,   1,  1,   0]
+            gr = [16, 20, 64, 160]
+            n_layers = [4, 16, 8, 4]
+            downSamp = [1, 1, 1, 0]
 
         if depth_wise:
             second_kernel = 1
@@ -177,12 +198,17 @@ class HarDNet(nn.Layer):
 
         # First Layer: Standard Conv3x3, Stride=2
         self.base.append(
-            ConvLayer(in_channels=3, out_channels=first_ch[0], kernel_size=3,
-                      stride=2, bias_attr=False))
+            ConvLayer(
+                in_channels=3,
+                out_channels=first_ch[0],
+                kernel_size=3,
+                stride=2,
+                bias_attr=False))
 
         # Second Layer
         self.base.append(
-            ConvLayer(first_ch[0], first_ch[1],  kernel_size=second_kernel))
+            ConvLayer(
+                first_ch[0], first_ch[1], kernel_size=second_kernel))
 
         # Maxpooling or DWConv3x3 downsampling
         if max_pool:
@@ -197,7 +223,7 @@ class HarDNet(nn.Layer):
             ch = blk.out_channels
             self.base.append(blk)
 
-            if i == blks-1 and arch == 85:
+            if i == blks - 1 and arch == 85:
                 self.base.append(nn.Dropout(0.1))
 
             self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
@@ -208,17 +234,17 @@ class HarDNet(nn.Layer):
                 else:
                     self.base.append(DWConvLayer(ch, ch, stride=2))
 
-        ch = ch_list[blks-1]
+        ch = ch_list[blks - 1]
 
         layers = []
 
         if with_pool:
             layers.append(nn.AdaptiveAvgPool2D((1, 1)))
 
-        if class_dim > 0:
+        if class_num > 0:
             layers.append(nn.Flatten())
             layers.append(nn.Dropout(drop_rate))
-            layers.append(nn.Linear(ch, class_dim))
+            layers.append(nn.Linear(ch, class_num))
 
         self.base.append(nn.Sequential(*layers))
 
diff --git a/ppcls/arch/backbone/model_zoo/inception_v4.py b/ppcls/arch/backbone/model_zoo/inception_v4.py
index 37cef5c20..e0460d48b 100644
--- a/ppcls/arch/backbone/model_zoo/inception_v4.py
+++ b/ppcls/arch/backbone/model_zoo/inception_v4.py
@@ -23,7 +23,10 @@ import math
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"InceptionV4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams"}
+MODEL_URLS = {
+    "InceptionV4":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams"
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -392,7 +395,7 @@ class InceptionC(nn.Layer):
 
 
 class InceptionV4DY(nn.Layer):
-    def __init__(self, class_dim=1000):
+    def __init__(self, class_num=1000):
         super(InceptionV4DY, self).__init__()
         self._inception_stem = InceptionStem()
 
@@ -420,7 +423,7 @@ class InceptionV4DY(nn.Layer):
         stdv = 1.0 / math.sqrt(1536 * 1.0)
         self.out = Linear(
             1536,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="final_fc_weights"),
             bias_attr=ParamAttr(name="final_fc_offset"))
@@ -466,7 +469,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
 
+
 def InceptionV4(pretrained=False, use_ssld=False, **kwargs):
     model = InceptionV4DY(**kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/levit.py b/ppcls/arch/backbone/model_zoo/levit.py
index bb74e00c6..78d013d65 100644
--- a/ppcls/arch/backbone/model_zoo/levit.py
+++ b/ppcls/arch/backbone/model_zoo/levit.py
@@ -27,12 +27,17 @@ from .vision_transformer import trunc_normal_, zeros_, ones_, Identity
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "LeViT_128S": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams",
-              "LeViT_128": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams",
-              "LeViT_192": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams",
-              "LeViT_256": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams",
-              "LeViT_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams",
-             }
+    "LeViT_128S":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams",
+    "LeViT_128":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams",
+    "LeViT_192":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams",
+    "LeViT_256":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams",
+    "LeViT_384":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -326,7 +331,7 @@ class LeViT(nn.Layer):
                  img_size=224,
                  patch_size=16,
                  in_chans=3,
-                 class_dim=1000,
+                 class_num=1000,
                  embed_dim=[192],
                  key_dim=[64],
                  depth=[12],
@@ -341,7 +346,7 @@ class LeViT(nn.Layer):
                  drop_path=0):
         super().__init__()
 
-        self.class_dim = class_dim
+        self.class_num = class_num
         self.num_features = embed_dim[-1]
         self.embed_dim = embed_dim
         self.distillation = distillation
@@ -403,10 +408,10 @@ class LeViT(nn.Layer):
 
         # Classifier head
         self.head = BN_Linear(embed_dim[-1],
-                              class_dim) if class_dim > 0 else Identity()
+                              class_num) if class_num > 0 else Identity()
         if distillation:
             self.head_dist = BN_Linear(
-                embed_dim[-1], class_dim) if class_dim > 0 else Identity()
+                embed_dim[-1], class_num) if class_num > 0 else Identity()
 
     def forward(self, x):
         x = self.patch_embed(x)
@@ -423,7 +428,7 @@ class LeViT(nn.Layer):
         return x
 
 
-def model_factory(C, D, X, N, drop_path, class_dim, distillation):
+def model_factory(C, D, X, N, drop_path, class_num, distillation):
     embed_dim = [int(x) for x in C.split('_')]
     num_heads = [int(x) for x in N.split('_')]
     depth = [int(x) for x in X.split('_')]
@@ -444,7 +449,7 @@ def model_factory(C, D, X, N, drop_path, class_dim, distillation):
         attention_activation=act,
         mlp_activation=act,
         hybrid_backbone=b16(embed_dim[0], activation=act),
-        class_dim=class_dim,
+        class_num=class_num,
         drop_path=drop_path,
         distillation=distillation)
 
@@ -489,6 +494,7 @@ specification = {
     },
 }
 
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -502,46 +508,71 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         )
 
 
-def LeViT_128S(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+def LeViT_128S(pretrained=False,
+               use_ssld=False,
+               class_num=1000,
+               distillation=False,
+               **kwargs):
     model = model_factory(
         **specification['LeViT_128S'],
-        class_dim=class_dim,
+        class_num=class_num,
         distillation=distillation)
-    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld)
     return model
 
 
-def LeViT_128(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+def LeViT_128(pretrained=False,
+              use_ssld=False,
+              class_num=1000,
+              distillation=False,
+              **kwargs):
     model = model_factory(
         **specification['LeViT_128'],
-        class_dim=class_dim,
+        class_num=class_num,
         distillation=distillation)
-    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld)
     return model
 
 
-def LeViT_192(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+def LeViT_192(pretrained=False,
+              use_ssld=False,
+              class_num=1000,
+              distillation=False,
+              **kwargs):
     model = model_factory(
         **specification['LeViT_192'],
-        class_dim=class_dim,
+        class_num=class_num,
         distillation=distillation)
-    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld)
     return model
 
 
-def LeViT_256(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+def LeViT_256(pretrained=False,
+              use_ssld=False,
+              class_num=1000,
+              distillation=False,
+              **kwargs):
     model = model_factory(
         **specification['LeViT_256'],
-        class_dim=class_dim,
+        class_num=class_num,
         distillation=distillation)
-    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld)
     return model
 
 
-def LeViT_384(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+def LeViT_384(pretrained=False,
+              use_ssld=False,
+              class_num=1000,
+              distillation=False,
+              **kwargs):
     model = model_factory(
         **specification['LeViT_384'],
-        class_dim=class_dim,
+        class_num=class_num,
         distillation=distillation)
-    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/mixnet.py b/ppcls/arch/backbone/model_zoo/mixnet.py
index 13582acb8..db460173d 100644
--- a/ppcls/arch/backbone/model_zoo/mixnet.py
+++ b/ppcls/arch/backbone/model_zoo/mixnet.py
@@ -25,9 +25,14 @@ import paddle.nn as nn
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"MixNet_S": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams", 
-              "MixNet_M": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams", 
-              "MixNet_L": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams"}
+MODEL_URLS = {
+    "MixNet_S":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams",
+    "MixNet_M":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams",
+    "MixNet_L":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams"
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -617,7 +622,7 @@ class MixNet(nn.Layer):
         Number of input channels.
     in_size : tuple of two ints, default (224, 224)
         Spatial size of the expected input image.
-    class_dim : int, default 1000
+    class_num : int, default 1000
         Number of classification classes.
     """
 
@@ -632,10 +637,10 @@ class MixNet(nn.Layer):
                  se_factors,
                  in_channels=3,
                  in_size=(224, 224),
-                 class_dim=1000):
+                 class_num=1000):
         super(MixNet, self).__init__()
         self.in_size = in_size
-        self.class_dim = class_dim
+        self.class_num = class_num
 
         self.features = nn.Sequential()
         self.features.add_sublayer(
@@ -687,7 +692,7 @@ class MixNet(nn.Layer):
                 kernel_size=7, stride=1))
 
         self.output = nn.Linear(
-            in_features=in_channels, out_features=class_dim)
+            in_features=in_channels, out_features=class_num)
 
     def forward(self, x):
         x = self.features(x)
@@ -773,9 +778,11 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
 
+
 def MixNet_S(pretrained=False, use_ssld=False, **kwargs):
     model = InceptionV4DY(**kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
     return model
 
 
@@ -786,7 +793,8 @@ def MixNet_S(**kwargs):
     """
     model = get_mixnet(
         version="s", width_scale=1.0, model_name="MixNet_S", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld)
     return model
 
 
@@ -797,7 +805,8 @@ def MixNet_M(**kwargs):
     """
     model = get_mixnet(
         version="m", width_scale=1.0, model_name="MixNet_M", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld)
     return model
 
 
@@ -808,6 +817,6 @@ def MixNet_L(**kwargs):
     """
     model = get_mixnet(
         version="m", width_scale=1.3, model_name="MixNet_L", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld)
     return model
-
diff --git a/ppcls/arch/backbone/model_zoo/mobilenet_v2.py b/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
index 4cafd1461..b32c0250b 100644
--- a/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
+++ b/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
@@ -28,12 +28,20 @@ import math
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"MobileNetV2_x0_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams", 
-              "MobileNetV2_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams", 
-              "MobileNetV2_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams",
-              "MobileNetV2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams",
-              "MobileNetV2_x1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams",
-              "MobileNetV2_x2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"}
+MODEL_URLS = {
+    "MobileNetV2_x0_25":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams",
+    "MobileNetV2_x0_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams",
+    "MobileNetV2_x0_75":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams",
+    "MobileNetV2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams",
+    "MobileNetV2_x1_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams",
+    "MobileNetV2_x2_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -155,10 +163,10 @@ class InvresiBlocks(nn.Layer):
 
 
 class MobileNet(nn.Layer):
-    def __init__(self, class_dim=1000, scale=1.0, prefix_name=""):
+    def __init__(self, class_num=1000, scale=1.0, prefix_name=""):
         super(MobileNet, self).__init__()
         self.scale = scale
-        self.class_dim = class_dim
+        self.class_num = class_num
 
         bottleneck_params_list = [
             (1, 16, 1, 1),
@@ -209,7 +217,7 @@ class MobileNet(nn.Layer):
 
         self.out = Linear(
             self.out_c,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(name=prefix_name + "fc10_weights"),
             bias_attr=ParamAttr(name=prefix_name + "fc10_offset"))
 
@@ -222,8 +230,8 @@ class MobileNet(nn.Layer):
         y = paddle.flatten(y, start_axis=1, stop_axis=-1)
         y = self.out(y)
         return y
-    
-    
+
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -235,39 +243,45 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
+
 
 def MobileNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs):
     model = MobileNet(scale=0.25, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld)
     return model
 
 
 def MobileNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs):
     model = MobileNet(scale=0.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld)
     return model
 
 
 def MobileNetV2_x0_75(pretrained=False, use_ssld=False, **kwargs):
     model = MobileNet(scale=0.75, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld)
     return model
 
 
 def MobileNetV2(pretrained=False, use_ssld=False, **kwargs):
     model = MobileNet(scale=1.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld)
     return model
 
 
 def MobileNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs):
     model = MobileNet(scale=1.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld)
     return model
 
 
 def MobileNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs):
     model = MobileNet(scale=2.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/rednet.py b/ppcls/arch/backbone/model_zoo/rednet.py
index a113a32ac..12802d59c 100644
--- a/ppcls/arch/backbone/model_zoo/rednet.py
+++ b/ppcls/arch/backbone/model_zoo/rednet.py
@@ -19,7 +19,6 @@ from paddle.vision.models import resnet
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-
 MODEL_URLS = {
     "RedNet26":
     "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet26_pretrained.pdparams",
@@ -33,7 +32,6 @@ MODEL_URLS = {
     "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet152_pretrained.pdparams"
 }
 
-
 __all__ = MODEL_URLS.keys()
 
 
@@ -51,50 +49,53 @@ class Involution(nn.Layer):
                 in_channels=channels,
                 out_channels=channels // reduction_ratio,
                 kernel_size=1,
-                bias_attr=False
-            )),
+                bias_attr=False)),
             ('bn', nn.BatchNorm2D(channels // reduction_ratio)),
-            ('activate', nn.ReLU())
-        )
-        self.conv2 = nn.Sequential(
-            ('conv', nn.Conv2D(
-                in_channels=channels // reduction_ratio,
-                out_channels=kernel_size**2 * self.groups,
-                kernel_size=1,
-                stride=1
-            ))
-        )
+            ('activate', nn.ReLU()))
+        self.conv2 = nn.Sequential(('conv', nn.Conv2D(
+            in_channels=channels // reduction_ratio,
+            out_channels=kernel_size**2 * self.groups,
+            kernel_size=1,
+            stride=1)))
         if stride > 1:
             self.avgpool = nn.AvgPool2D(stride, stride)
 
     def forward(self, x):
-        weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
+        weight = self.conv2(
+            self.conv1(x if self.stride == 1 else self.avgpool(x)))
         b, c, h, w = weight.shape
-        weight = weight.reshape((b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2)
+        weight = weight.reshape(
+            (b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2)
 
-        out = nn.functional.unfold(x, self.kernel_size, self.stride, (self.kernel_size-1)//2, 1)
-        out = out.reshape((b, self.groups, self.group_channels, self.kernel_size**2, h, w))
+        out = nn.functional.unfold(x, self.kernel_size, self.stride,
+                                   (self.kernel_size - 1) // 2, 1)
+        out = out.reshape(
+            (b, self.groups, self.group_channels, self.kernel_size**2, h, w))
         out = (weight * out).sum(axis=3).reshape((b, self.channels, h, w))
         return out
 
 
 class BottleneckBlock(resnet.BottleneckBlock):
-    def __init__(self, inplanes, planes, stride=1, downsample=None, 
-                 groups=1, base_width=64, dilation=1, norm_layer=None):
-        super(BottleneckBlock, self).__init__(
-            inplanes, planes, stride, downsample, 
-            groups, base_width, dilation, norm_layer
-        )
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 groups=1,
+                 base_width=64,
+                 dilation=1,
+                 norm_layer=None):
+        super(BottleneckBlock, self).__init__(inplanes, planes, stride,
+                                              downsample, groups, base_width,
+                                              dilation, norm_layer)
         width = int(planes * (base_width / 64.)) * groups
         self.conv2 = Involution(width, 7, stride)
 
 
 class RedNet(resnet.ResNet):
-    def __init__(self, block, depth, class_dim=1000, with_pool=True):
+    def __init__(self, block, depth, class_num=1000, with_pool=True):
         super(RedNet, self).__init__(
-            block=block, depth=50, 
-            num_classes=class_dim, with_pool=with_pool
-        )
+            block=block, depth=50, num_classes=class_num, with_pool=with_pool)
         layer_cfg = {
             26: [1, 2, 4, 1],
             38: [2, 3, 5, 2],
@@ -108,7 +109,7 @@ class RedNet(resnet.ResNet):
         self.bn1 = None
         self.relu = None
         self.inplanes = 64
-        self.class_dim = class_dim
+        self.class_num = class_num
         self.stem = nn.Sequential(
             nn.Sequential(
                 ('conv', nn.Conv2D(
@@ -117,11 +118,9 @@ class RedNet(resnet.ResNet):
                     kernel_size=3,
                     stride=2,
                     padding=1,
-                    bias_attr=False
-                )),
+                    bias_attr=False)),
                 ('bn', nn.BatchNorm2D(self.inplanes // 2)),
-                ('activate', nn.ReLU())
-            ),
+                ('activate', nn.ReLU())),
             Involution(self.inplanes // 2, 3, 1),
             nn.BatchNorm2D(self.inplanes // 2),
             nn.ReLU(),
@@ -132,12 +131,8 @@ class RedNet(resnet.ResNet):
                     kernel_size=3,
                     stride=1,
                     padding=1,
-                    bias_attr=False
-                )),
-                ('bn', nn.BatchNorm2D(self.inplanes)),
-                ('activate', nn.ReLU())
-            )
-        )
+                    bias_attr=False)), ('bn', nn.BatchNorm2D(self.inplanes)),
+                ('activate', nn.ReLU())))
 
         self.layer1 = self._make_layer(block, 64, layers[0])
         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
@@ -156,7 +151,7 @@ class RedNet(resnet.ResNet):
         if self.with_pool:
             x = self.avgpool(x)
 
-        if self.class_dim > 0:
+        if self.class_num > 0:
             x = paddle.flatten(x, 1)
             x = self.fc(x)
 
diff --git a/ppcls/arch/backbone/model_zoo/regnet.py b/ppcls/arch/backbone/model_zoo/regnet.py
index 86802ee7e..549bd1617 100644
--- a/ppcls/arch/backbone/model_zoo/regnet.py
+++ b/ppcls/arch/backbone/model_zoo/regnet.py
@@ -28,13 +28,20 @@ import math
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"RegNetX_200MF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams", 
-              "RegNetX_4GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams", 
-              "RegNetX_32GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams", 
-              "RegNetY_200MF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_200MF_pretrained.pdparams", 
-              "RegNetY_4GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams", 
-              "RegNetY_32GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams", 
-             }
+MODEL_URLS = {
+    "RegNetX_200MF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams",
+    "RegNetX_4GF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams",
+    "RegNetX_32GF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams",
+    "RegNetY_200MF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_200MF_pretrained.pdparams",
+    "RegNetY_4GF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams",
+    "RegNetY_32GF":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -235,7 +242,7 @@ class RegNet(nn.Layer):
                  bot_mul,
                  q=8,
                  se_on=False,
-                 class_dim=1000):
+                 class_num=1000):
         super(RegNet, self).__init__()
 
         # Generate RegNet ws per block
@@ -301,7 +308,7 @@ class RegNet(nn.Layer):
 
         self.out = Linear(
             self.pool2d_avg_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
             bias_attr=ParamAttr(name="fc_0.b_0"))
@@ -315,7 +322,7 @@ class RegNet(nn.Layer):
         y = self.out(y)
         return y
 
-    
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -327,12 +334,20 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
-    
+
+
 def RegNetX_200MF(pretrained=False, use_ssld=False, **kwargs):
     model = RegNet(
-        w_a=36.44, w_0=24, w_m=2.49, d=13, group_w=8, bot_mul=1.0, q=8, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_200MF"], use_ssld=use_ssld)
+        w_a=36.44,
+        w_0=24,
+        w_m=2.49,
+        d=13,
+        group_w=8,
+        bot_mul=1.0,
+        q=8,
+        **kwargs)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_200MF"], use_ssld=use_ssld)
     return model
 
 
@@ -346,7 +361,8 @@ def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs):
         bot_mul=1.0,
         q=8,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_4GF"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_4GF"], use_ssld=use_ssld)
     return model
 
 
@@ -360,7 +376,8 @@ def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs):
         bot_mul=1.0,
         q=8,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
     return model
 
 
@@ -375,7 +392,8 @@ def RegNetY_200MF(pretrained=False, use_ssld=False, **kwargs):
         q=8,
         se_on=True,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
     return model
 
 
@@ -390,7 +408,8 @@ def RegNetY_4GF(pretrained=False, use_ssld=False, **kwargs):
         q=8,
         se_on=True,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
     return model
 
 
@@ -405,5 +424,6 @@ def RegNetY_32GF(pretrained=False, use_ssld=False, **kwargs):
         q=8,
         se_on=True,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/repvgg.py b/ppcls/arch/backbone/model_zoo/repvgg.py
index 2447fbe25..94b9355ea 100644
--- a/ppcls/arch/backbone/model_zoo/repvgg.py
+++ b/ppcls/arch/backbone/model_zoo/repvgg.py
@@ -4,24 +4,37 @@ import numpy as np
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"RepVGG_A0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams",
-              "RepVGG_A1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams",
-              "RepVGG_A2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams",
-              "RepVGG_B0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams",
-              "RepVGG_B1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams",
-              "RepVGG_B2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams",
-              "RepVGG_B3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams",
-              "RepVGG_B1g2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams",
-              "RepVGG_B1g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams",
-              "RepVGG_B2g2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g2_pretrained.pdparams",
-              "RepVGG_B2g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams",
-              "RepVGG_B3g2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g2_pretrained.pdparams",
-              "RepVGG_B3g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "RepVGG_A0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams",
+    "RepVGG_A1":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams",
+    "RepVGG_A2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams",
+    "RepVGG_B0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams",
+    "RepVGG_B1":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams",
+    "RepVGG_B2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams",
+    "RepVGG_B3":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams",
+    "RepVGG_B1g2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams",
+    "RepVGG_B1g4":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams",
+    "RepVGG_B2g2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g2_pretrained.pdparams",
+    "RepVGG_B2g4":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams",
+    "RepVGG_B3g2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g2_pretrained.pdparams",
+    "RepVGG_B3g4":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
-
 optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
 g2_map = {l: 2 for l in optional_groupwise_layers}
 g4_map = {l: 4 for l in optional_groupwise_layers}
@@ -174,7 +187,7 @@ class RepVGG(nn.Layer):
                  num_blocks,
                  width_multiplier=None,
                  override_groups_map=None,
-                 class_dim=1000):
+                 class_num=1000):
         super(RepVGG, self).__init__()
 
         assert len(width_multiplier) == 4
@@ -200,7 +213,7 @@ class RepVGG(nn.Layer):
         self.stage4 = self._make_stage(
             int(512 * width_multiplier[3]), num_blocks[3], stride=2)
         self.gap = nn.AdaptiveAvgPool2D(output_size=1)
-        self.linear = nn.Linear(int(512 * width_multiplier[3]), class_dim)
+        self.linear = nn.Linear(int(512 * width_multiplier[3]), class_num)
 
     def _make_stage(self, planes, num_blocks, stride):
         strides = [stride] + [1] * (num_blocks - 1)
@@ -248,7 +261,7 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
+
 
 def RepVGG_A0(pretrained=False, use_ssld=False, **kwargs):
     model = RepVGG(
@@ -256,7 +269,8 @@ def RepVGG_A0(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[0.75, 0.75, 0.75, 2.5],
         override_groups_map=None,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_A0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_A0"], use_ssld=use_ssld)
     return model
 
 
@@ -266,7 +280,8 @@ def RepVGG_A1(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[1, 1, 1, 2.5],
         override_groups_map=None,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_A1"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_A1"], use_ssld=use_ssld)
     return model
 
 
@@ -276,7 +291,8 @@ def RepVGG_A2(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[1.5, 1.5, 1.5, 2.75],
         override_groups_map=None,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_A2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_A2"], use_ssld=use_ssld)
     return model
 
 
@@ -286,7 +302,8 @@ def RepVGG_B0(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[1, 1, 1, 2.5],
         override_groups_map=None,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B0"], use_ssld=use_ssld)
     return model
 
 
@@ -296,7 +313,8 @@ def RepVGG_B1(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[2, 2, 2, 4],
         override_groups_map=None,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B1"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B1"], use_ssld=use_ssld)
     return model
 
 
@@ -306,7 +324,8 @@ def RepVGG_B1g2(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[2, 2, 2, 4],
         override_groups_map=g2_map,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B1g2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B1g2"], use_ssld=use_ssld)
     return model
 
 
@@ -316,7 +335,8 @@ def RepVGG_B1g4(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[2, 2, 2, 4],
         override_groups_map=g4_map,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B1g4"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B1g4"], use_ssld=use_ssld)
     return model
 
 
@@ -326,7 +346,8 @@ def RepVGG_B2(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[2.5, 2.5, 2.5, 5],
         override_groups_map=None,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B2"], use_ssld=use_ssld)
     return model
 
 
@@ -336,7 +357,8 @@ def RepVGG_B2g2(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[2.5, 2.5, 2.5, 5],
         override_groups_map=g2_map,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B2g2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B2g2"], use_ssld=use_ssld)
     return model
 
 
@@ -346,7 +368,8 @@ def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[2.5, 2.5, 2.5, 5],
         override_groups_map=g4_map,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B2g4"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B2g4"], use_ssld=use_ssld)
     return model
 
 
@@ -356,7 +379,8 @@ def RepVGG_B3(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[3, 3, 3, 5],
         override_groups_map=None,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B3"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B3"], use_ssld=use_ssld)
     return model
 
 
@@ -366,7 +390,8 @@ def RepVGG_B3g2(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[3, 3, 3, 5],
         override_groups_map=g2_map,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B3g2"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B3g2"], use_ssld=use_ssld)
     return model
 
 
@@ -376,5 +401,6 @@ def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs):
         width_multiplier=[3, 3, 3, 5],
         override_groups_map=g4_map,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/res2net.py b/ppcls/arch/backbone/model_zoo/res2net.py
index 15a9427c2..191cc849c 100644
--- a/ppcls/arch/backbone/model_zoo/res2net.py
+++ b/ppcls/arch/backbone/model_zoo/res2net.py
@@ -29,9 +29,12 @@ import math
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"Res2Net50_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams",
-              "Res2Net50_14w_8s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "Res2Net50_26w_4s":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams",
+    "Res2Net50_14w_8s":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -151,7 +154,7 @@ class BottleneckBlock(nn.Layer):
 
 
 class Res2Net(nn.Layer):
-    def __init__(self, layers=50, scales=4, width=26, class_dim=1000):
+    def __init__(self, layers=50, scales=4, width=26, class_num=1000):
         super(Res2Net, self).__init__()
 
         self.layers = layers
@@ -218,7 +221,7 @@ class Res2Net(nn.Layer):
 
         self.out = Linear(
             self.pool2d_avg_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc_weights"),
             bias_attr=ParamAttr(name="fc_offset"))
@@ -245,15 +248,17 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
+
 
 def Res2Net50_26w_4s(pretrained=False, use_ssld=False, **kwargs):
     model = Res2Net(layers=50, scales=4, width=26, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Res2Net50_26w_4s"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Res2Net50_26w_4s"], use_ssld=use_ssld)
     return model
 
 
 def Res2Net50_14w_8s(pretrained=False, use_ssld=False, **kwargs):
     model = Res2Net(layers=50, scales=8, width=14, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Res2Net50_14w_8s"], use_ssld=use_ssld)
-    return model
\ No newline at end of file
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Res2Net50_14w_8s"], use_ssld=use_ssld)
+    return model
diff --git a/ppcls/arch/backbone/model_zoo/res2net_vd.py b/ppcls/arch/backbone/model_zoo/res2net_vd.py
index 28ab03a01..a37567980 100644
--- a/ppcls/arch/backbone/model_zoo/res2net_vd.py
+++ b/ppcls/arch/backbone/model_zoo/res2net_vd.py
@@ -29,10 +29,14 @@ import math
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"Res2Net50_vd_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams",
-              "Res2Net101_vd_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams",
-              "Res2Net200_vd_26w_4s": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "Res2Net50_vd_26w_4s":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams",
+    "Res2Net101_vd_26w_4s":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams",
+    "Res2Net200_vd_26w_4s":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -159,7 +163,7 @@ class BottleneckBlock(nn.Layer):
 
 
 class Res2Net_vd(nn.Layer):
-    def __init__(self, layers=50, scales=4, width=26, class_dim=1000):
+    def __init__(self, layers=50, scales=4, width=26, class_num=1000):
         super(Res2Net_vd, self).__init__()
 
         self.layers = layers
@@ -240,7 +244,7 @@ class Res2Net_vd(nn.Layer):
 
         self.out = Linear(
             self.pool2d_avg_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc_weights"),
             bias_attr=ParamAttr(name="fc_offset"))
@@ -273,17 +277,29 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
 
 def Res2Net50_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
     model = Res2Net_vd(layers=50, scales=4, width=26, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Res2Net50_vd_26w_4s"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["Res2Net50_vd_26w_4s"],
+        use_ssld=use_ssld)
     return model
 
 
 def Res2Net101_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
     model = Res2Net_vd(layers=101, scales=4, width=26, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Res2Net101_vd_26w_4s"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["Res2Net101_vd_26w_4s"],
+        use_ssld=use_ssld)
     return model
 
 
 def Res2Net200_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
     model = Res2Net_vd(layers=200, scales=4, width=26, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Res2Net200_vd_26w_4s"], use_ssld=use_ssld)
-    return model
\ No newline at end of file
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["Res2Net200_vd_26w_4s"],
+        use_ssld=use_ssld)
+    return model
diff --git a/ppcls/arch/backbone/model_zoo/resnest.py b/ppcls/arch/backbone/model_zoo/resnest.py
index 3160095ef..a414c29f5 100644
--- a/ppcls/arch/backbone/model_zoo/resnest.py
+++ b/ppcls/arch/backbone/model_zoo/resnest.py
@@ -29,10 +29,14 @@ from paddle.regularizer import L2Decay
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"ResNeSt50_fast_1s1x64d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams",
-              "ResNeSt50": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams",
-              "ResNeSt101": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams",
-             }
+MODEL_URLS = {
+    "ResNeSt50_fast_1s1x64d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams",
+    "ResNeSt50":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams",
+    "ResNeSt101":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -453,7 +457,7 @@ class ResNeSt(nn.Layer):
                  avd_first=False,
                  final_drop=0.0,
                  last_gamma=False,
-                 class_dim=1000):
+                 class_num=1000):
         super(ResNeSt, self).__init__()
 
         self.cardinality = groups
@@ -643,7 +647,7 @@ class ResNeSt(nn.Layer):
 
         self.out = Linear(
             self.out_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=nn.initializer.Uniform(-stdv, stdv),
                 name="fc_weights"),
@@ -663,7 +667,7 @@ class ResNeSt(nn.Layer):
         x = self.out(x)
         return x
 
-    
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -675,8 +679,8 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
-    
+
+
 def ResNeSt50_fast_1s1x64d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeSt(
         layers=[3, 4, 6, 3],
@@ -690,7 +694,11 @@ def ResNeSt50_fast_1s1x64d(pretrained=False, use_ssld=False, **kwargs):
         avd_first=True,
         final_drop=0.0,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeSt50_fast_1s1x64d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeSt50_fast_1s1x64d"],
+        use_ssld=use_ssld)
     return model
 
 
@@ -707,7 +715,8 @@ def ResNeSt50(pretrained=False, use_ssld=False, **kwargs):
         avd_first=False,
         final_drop=0.0,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld)
     return model
 
 
@@ -724,5 +733,6 @@ def ResNeSt101(pretrained=False, use_ssld=False, **kwargs):
         avd_first=False,
         final_drop=0.0,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/resnet_vc.py b/ppcls/arch/backbone/model_zoo/resnet_vc.py
index 53b9f8d5e..6b972dc7b 100644
--- a/ppcls/arch/backbone/model_zoo/resnet_vc.py
+++ b/ppcls/arch/backbone/model_zoo/resnet_vc.py
@@ -30,8 +30,9 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "ResNet50_vc": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams",
-             }
+    "ResNet50_vc":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -177,7 +178,7 @@ class BasicBlock(nn.Layer):
 
 
 class ResNet_vc(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000):
+    def __init__(self, layers=50, class_num=1000):
         super(ResNet_vc, self).__init__()
 
         self.layers = layers
@@ -270,7 +271,7 @@ class ResNet_vc(nn.Layer):
 
         self.out = Linear(
             self.pool2d_avg_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
             bias_attr=ParamAttr(name="fc_0.b_0"))
@@ -287,7 +288,7 @@ class ResNet_vc(nn.Layer):
         y = self.out(y)
         return y
 
-    
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -300,9 +301,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
 
-        
+
 def ResNet50_vc(pretrained=False, use_ssld=False, **kwargs):
     model = ResNet_vc(layers=50, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNet50_vc"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNet50_vc"], use_ssld=use_ssld)
     return model
-
diff --git a/ppcls/arch/backbone/model_zoo/resnext.py b/ppcls/arch/backbone/model_zoo/resnext.py
index 5104b4cba..1aef81144 100644
--- a/ppcls/arch/backbone/model_zoo/resnext.py
+++ b/ppcls/arch/backbone/model_zoo/resnext.py
@@ -30,13 +30,19 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "ResNeXt50_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams",
-              "ResNeXt50_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams",
-              "ResNeXt101_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams",
-              "ResNeXt101_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams",
-              "ResNeXt152_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams",
-              "ResNeXt152_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams",
-             }
+    "ResNeXt50_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams",
+    "ResNeXt50_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams",
+    "ResNeXt101_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams",
+    "ResNeXt101_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams",
+    "ResNeXt152_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams",
+    "ResNeXt152_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -143,7 +149,12 @@ class BottleneckBlock(nn.Layer):
 
 
 class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32, input_image_channel=3, data_format="NCHW"):
+    def __init__(self,
+                 layers=50,
+                 class_num=1000,
+                 cardinality=32,
+                 input_image_channel=3,
+                 data_format="NCHW"):
         super(ResNeXt, self).__init__()
 
         self.layers = layers
@@ -176,7 +187,8 @@ class ResNeXt(nn.Layer):
             act='relu',
             name="res_conv1",
             data_format=self.data_format)
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1, data_format=self.data_format)
+        self.pool2d_max = MaxPool2D(
+            kernel_size=3, stride=2, padding=1, data_format=self.data_format)
 
         self.block_list = []
         for block in range(len(depth)):
@@ -211,7 +223,7 @@ class ResNeXt(nn.Layer):
 
         self.out = Linear(
             self.pool2d_avg_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc_weights"),
             bias_attr=ParamAttr(name="fc_offset"))
@@ -230,7 +242,7 @@ class ResNeXt(nn.Layer):
             y = self.out(y)
             return y
 
-        
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -246,35 +258,41 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
 
 def ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=50, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt50_32x4d"], use_ssld=use_ssld)
     return model
 
 
 def ResNeXt50_64x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=50, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt50_64x4d"], use_ssld=use_ssld)
     return model
 
 
 def ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=101, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt101_32x4d"], use_ssld=use_ssld)
     return model
 
 
 def ResNeXt101_64x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=101, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt101_64x4d"], use_ssld=use_ssld)
     return model
 
 
 def ResNeXt152_32x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=152, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt152_32x4d"], use_ssld=use_ssld)
     return model
 
 
 def ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=152, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt152_64x4d"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/resnext101_wsl.py b/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
index e530a9a2b..2b3c88b55 100644
--- a/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
+++ b/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
@@ -9,17 +9,19 @@ from paddle.nn.initializer import Uniform
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "ResNeXt101_32x8d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams",
-              "ResNeXt101_32x16d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x816_wsl_pretrained.pdparams",
-              "ResNeXt101_32x32d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams",
-              "ResNeXt101_32x48d_wsl": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams",
-
-             }
+    "ResNeXt101_32x8d_wsl":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams",
+    "ResNeXt101_32x16d_wsl":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x816_wsl_pretrained.pdparams",
+    "ResNeXt101_32x32d_wsl":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams",
+    "ResNeXt101_32x48d_wsl":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
 
-
 class ConvBNLayer(nn.Layer):
     def __init__(self,
                  input_channels,
@@ -128,10 +130,10 @@ class BottleneckBlock(nn.Layer):
 
 
 class ResNeXt101WSL(nn.Layer):
-    def __init__(self, layers=101, cardinality=32, width=48, class_dim=1000):
+    def __init__(self, layers=101, cardinality=32, width=48, class_num=1000):
         super(ResNeXt101WSL, self).__init__()
 
-        self.class_dim = class_dim
+        self.class_num = class_num
 
         self.layers = layers
         self.cardinality = cardinality
@@ -384,7 +386,7 @@ class ResNeXt101WSL(nn.Layer):
         self._avg_pool = AdaptiveAvgPool2D(1)
         self._out = Linear(
             num_filters[3] // (width // 8),
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(name="fc.weight"),
             bias_attr=ParamAttr(name="fc.bias"))
 
@@ -434,7 +436,7 @@ class ResNeXt101WSL(nn.Layer):
         x = self._out(x)
         return x
 
-    
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -450,23 +452,39 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
 
 def ResNeXt101_32x8d_wsl(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt101WSL(cardinality=32, width=8, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x8d_wsl"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_32x8d_wsl"],
+        use_ssld=use_ssld)
     return model
 
 
 def ResNeXt101_32x16d_wsl(**args):
     model = ResNeXt101WSL(cardinality=32, width=16, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x16d_ws"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_32x16d_ws"],
+        use_ssld=use_ssld)
     return model
 
 
 def ResNeXt101_32x32d_wsl(**args):
     model = ResNeXt101WSL(cardinality=32, width=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x32d_wsl"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_32x32d_wsl"],
+        use_ssld=use_ssld)
     return model
 
 
 def ResNeXt101_32x48d_wsl(**args):
     model = ResNeXt101WSL(cardinality=32, width=48, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_32x48d_wsl"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_32x48d_wsl"],
+        use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/resnext_vd.py b/ppcls/arch/backbone/model_zoo/resnext_vd.py
index b14e265e9..b2bd484f3 100644
--- a/ppcls/arch/backbone/model_zoo/resnext_vd.py
+++ b/ppcls/arch/backbone/model_zoo/resnext_vd.py
@@ -30,16 +30,23 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "ResNeXt50_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams",
-              "ResNeXt50_vd_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams",
-              "ResNeXt101_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams",
-              "ResNeXt101_vd_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams",
-              "ResNeXt152_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams",
-              "ResNeXt152_vd_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams",
-             }
+    "ResNeXt50_vd_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams",
+    "ResNeXt50_vd_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams",
+    "ResNeXt101_vd_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams",
+    "ResNeXt101_vd_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams",
+    "ResNeXt152_vd_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams",
+    "ResNeXt152_vd_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
+
 class ConvBNLayer(nn.Layer):
     def __init__(
             self,
@@ -145,7 +152,7 @@ class BottleneckBlock(nn.Layer):
 
 
 class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32):
+    def __init__(self, layers=50, class_num=1000, cardinality=32):
         super(ResNeXt, self).__init__()
 
         self.layers = layers
@@ -225,7 +232,7 @@ class ResNeXt(nn.Layer):
 
         self.out = Linear(
             self.pool2d_avg_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc_weights"),
             bias_attr=ParamAttr(name="fc_offset"))
@@ -242,6 +249,7 @@ class ResNeXt(nn.Layer):
         y = self.out(y)
         return y
 
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -257,35 +265,53 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
 
 def ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=50, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_vd_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt50_vd_32x4d"], use_ssld=use_ssld)
     return model
 
 
 def ResNeXt50_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=50, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt50_vd_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ResNeXt50_vd_64x4d"], use_ssld=use_ssld)
     return model
 
 
 def ResNeXt101_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=101, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_vd_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_vd_32x4d"],
+        use_ssld=use_ssld)
     return model
 
 
 def ResNeXt101_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=101, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt101_vd_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt101_vd_64x4d"],
+        use_ssld=use_ssld)
     return model
 
 
 def ResNeXt152_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=152, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_vd_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt152_vd_32x4d"],
+        use_ssld=use_ssld)
     return model
 
 
 def ResNeXt152_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=152, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ResNeXt152_vd_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["ResNeXt152_vd_64x4d"],
+        use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/rexnet.py b/ppcls/arch/backbone/model_zoo/rexnet.py
index 799826c94..039f6c538 100644
--- a/ppcls/arch/backbone/model_zoo/rexnet.py
+++ b/ppcls/arch/backbone/model_zoo/rexnet.py
@@ -25,12 +25,17 @@ from math import ceil
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "ReXNet_1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams",
-              "ReXNet_1_3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams",
-              "ReXNet_1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_32x4d_pretrained.pdparams",
-              "ReXNet_2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams",
-              "ReXNet_3_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams",
-             }
+    "ReXNet_1_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams",
+    "ReXNet_1_3":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams",
+    "ReXNet_1_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_32x4d_pretrained.pdparams",
+    "ReXNet_2_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams",
+    "ReXNet_3_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -154,7 +159,7 @@ class ReXNetV1(nn.Layer):
                  final_ch=180,
                  width_mult=1.0,
                  depth_mult=1.0,
-                 class_dim=1000,
+                 class_num=1000,
                  use_se=True,
                  se_ratio=12,
                  dropout_ratio=0.2,
@@ -220,7 +225,7 @@ class ReXNetV1(nn.Layer):
         self.output = nn.Sequential(
             nn.Dropout(dropout_ratio),
             nn.Conv2D(
-                pen_channels, class_dim, 1, bias_attr=True))
+                pen_channels, class_num, 1, bias_attr=True))
 
     def forward(self, x):
         x = self.features(x)
@@ -239,33 +244,38 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-    
-    
+
+
 def ReXNet_1_0(pretrained=False, use_ssld=False, **kwargs):
     model = ReXNetV1(width_mult=1.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_1_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ReXNet_1_0"], use_ssld=use_ssld)
     return model
 
 
 def ReXNet_1_3(pretrained=False, use_ssld=False, **kwargs):
     model = ReXNetV1(width_mult=1.3, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_1_3"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ReXNet_1_3"], use_ssld=use_ssld)
     return model
 
 
 def ReXNet_1_5(pretrained=False, use_ssld=False, **kwargs):
     model = ReXNetV1(width_mult=1.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_1_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ReXNet_1_5"], use_ssld=use_ssld)
     return model
 
 
 def ReXNet_2_0(pretrained=False, use_ssld=False, **kwargs):
     model = ReXNetV1(width_mult=2.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_2_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ReXNet_2_0"], use_ssld=use_ssld)
     return model
 
 
 def ReXNet_3_0(pretrained=False, use_ssld=False, **kwargs):
     model = ReXNetV1(width_mult=3.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ReXNet_3_0"], use_ssld=use_ssld)
-    return model
\ No newline at end of file
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ReXNet_3_0"], use_ssld=use_ssld)
+    return model
diff --git a/ppcls/arch/backbone/model_zoo/se_resnet_vd.py b/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
index cc48f8d36..205feec54 100644
--- a/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
@@ -29,11 +29,13 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "SE_ResNet18_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams",
-              "SE_ResNet34_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams",
-              "SE_ResNet50_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams",
-
-             }
+    "SE_ResNet18_vd":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams",
+    "SE_ResNet34_vd":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams",
+    "SE_ResNet50_vd":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -240,7 +242,7 @@ class SELayer(nn.Layer):
 
 
 class SE_ResNet_vd(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000):
+    def __init__(self, layers=50, class_num=1000):
         super(SE_ResNet_vd, self).__init__()
 
         self.layers = layers
@@ -336,7 +338,7 @@ class SE_ResNet_vd(nn.Layer):
 
         self.out = Linear(
             self.pool2d_avg_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc6_weights"),
             bias_attr=ParamAttr(name="fc6_offset"))
@@ -353,7 +355,7 @@ class SE_ResNet_vd(nn.Layer):
         y = self.out(y)
         return y
 
-    
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -365,21 +367,24 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-    
+
 
 def SE_ResNet18_vd(pretrained=False, use_ssld=False, **kwargs):
     model = SE_ResNet_vd(layers=18, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNet18_vd"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SE_ResNet18_vd"], use_ssld=use_ssld)
     return model
 
 
 def SE_ResNet34_vd(pretrained=False, use_ssld=False, **kwargs):
     model = SE_ResNet_vd(layers=34, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNet34_vd"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SE_ResNet34_vd"], use_ssld=use_ssld)
     return model
 
 
 def SE_ResNet50_vd(pretrained=False, use_ssld=False, **kwargs):
     model = SE_ResNet_vd(layers=50, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNet50_vd"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SE_ResNet50_vd"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/se_resnext.py b/ppcls/arch/backbone/model_zoo/se_resnext.py
index d873d8111..8b7149e26 100644
--- a/ppcls/arch/backbone/model_zoo/se_resnext.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnext.py
@@ -30,11 +30,13 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "SE_ResNeXt50_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams",
-              "SE_ResNeXt101_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams",
-              "SE_ResNeXt152_64x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt152_64x4d_pretrained.pdparams",
-
-             }
+    "SE_ResNeXt50_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams",
+    "SE_ResNeXt101_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams",
+    "SE_ResNeXt152_64x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt152_64x4d_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -148,7 +150,12 @@ class BottleneckBlock(nn.Layer):
 
 
 class SELayer(nn.Layer):
-    def __init__(self, num_channels, num_filters, reduction_ratio, name=None, data_format="NCHW"):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 reduction_ratio,
+                 name=None,
+                 data_format="NCHW"):
         super(SELayer, self).__init__()
 
         self.data_format = data_format
@@ -193,7 +200,12 @@ class SELayer(nn.Layer):
 
 
 class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32, input_image_channel=3, data_format="NCHW"):
+    def __init__(self,
+                 layers=50,
+                 class_num=1000,
+                 cardinality=32,
+                 input_image_channel=3,
+                 data_format="NCHW"):
         super(ResNeXt, self).__init__()
 
         self.layers = layers
@@ -254,7 +266,8 @@ class ResNeXt(nn.Layer):
                 name="conv3",
                 data_format=self.data_format)
 
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1, data_format=self.data_format)
+        self.pool2d_max = MaxPool2D(
+            kernel_size=3, stride=2, padding=1, data_format=self.data_format)
 
         self.block_list = []
         n = 1 if layers == 50 or layers == 101 else 3
@@ -286,13 +299,13 @@ class ResNeXt(nn.Layer):
 
         self.out = Linear(
             self.pool2d_avg_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc6_weights"),
             bias_attr=ParamAttr(name="fc6_offset"))
 
     def forward(self, inputs):
-         with paddle.static.amp.fp16_guard():
+        with paddle.static.amp.fp16_guard():
             if self.data_format == "NHWC":
                 inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
                 inputs.stop_gradient = True
@@ -310,7 +323,7 @@ class ResNeXt(nn.Layer):
             y = self.out(y)
             return y
 
-        
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -322,21 +335,30 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
+
 
 def SE_ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=50, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt50_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SE_ResNeXt50_32x4d"], use_ssld=use_ssld)
     return model
 
 
 def SE_ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=101, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt101_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SE_ResNeXt101_32x4d"],
+        use_ssld=use_ssld)
     return model
 
 
 def SE_ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=152, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt152_64x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SE_ResNeXt152_64x4d"],
+        use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/se_resnext_vd.py b/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
index 5e840f83d..b23b0d2d5 100644
--- a/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
@@ -30,11 +30,13 @@ import math
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "SE_ResNeXt50_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams",
-              "SE_ResNeXt50_vd_32x4d": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams",
-              "SENet154_vd": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams",
-
-             }
+    "SE_ResNeXt50_vd_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams",
+    "SE_ResNeXt50_vd_32x4d":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams",
+    "SENet154_vd":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -184,7 +186,7 @@ class SELayer(nn.Layer):
 
 
 class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32):
+    def __init__(self, layers=50, class_num=1000, cardinality=32):
         super(ResNeXt, self).__init__()
 
         self.layers = layers
@@ -261,7 +263,7 @@ class ResNeXt(nn.Layer):
 
         self.out = Linear(
             self.pool2d_avg_channels,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 initializer=Uniform(-stdv, stdv), name="fc6_weights"),
             bias_attr=ParamAttr(name="fc6_offset"))
@@ -278,7 +280,7 @@ class ResNeXt(nn.Layer):
         y = self.out(y)
         return y
 
-    
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -290,21 +292,30 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
+
 
 def SE_ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=50, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt50_vd_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SE_ResNeXt50_vd_32x4d"],
+        use_ssld=use_ssld)
     return model
 
 
 def SE_ResNeXt101_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=101, cardinality=32, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SE_ResNeXt101_vd_32x4d"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SE_ResNeXt101_vd_32x4d"],
+        use_ssld=use_ssld)
     return model
 
 
 def SENet154_vd(pretrained=False, use_ssld=False, **kwargs):
     model = ResNeXt(layers=152, cardinality=64, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SENet154_vd"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SENet154_vd"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/shufflenet_v2.py b/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
index 29abad66e..d8bb69ffe 100644
--- a/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
+++ b/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
@@ -25,14 +25,21 @@ from paddle.nn.functional import swish
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "ShuffleNetV2_x0_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams",
-              "ShuffleNetV2_x0_33": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams",
-              "ShuffleNetV2_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams",
-              "ShuffleNetV2_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams",
-              "ShuffleNetV2_x1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams",
-              "ShuffleNetV2_x2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams",
-              "ShuffleNetV2_swish": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams"
-             }
+    "ShuffleNetV2_x0_25":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams",
+    "ShuffleNetV2_x0_33":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams",
+    "ShuffleNetV2_x0_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams",
+    "ShuffleNetV2_x1_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams",
+    "ShuffleNetV2_x1_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams",
+    "ShuffleNetV2_x2_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams",
+    "ShuffleNetV2_swish":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams"
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -207,10 +214,10 @@ class InvertedResidualDS(Layer):
 
 
 class ShuffleNet(Layer):
-    def __init__(self, class_dim=1000, scale=1.0, act="relu"):
+    def __init__(self, class_num=1000, scale=1.0, act="relu"):
         super(ShuffleNet, self).__init__()
         self.scale = scale
-        self.class_dim = class_dim
+        self.class_num = class_num
         stage_repeats = [4, 8, 4]
 
         if scale == 0.25:
@@ -277,7 +284,7 @@ class ShuffleNet(Layer):
         # 5. fc
         self._fc = Linear(
             stage_out_channels[-1],
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(name='fc6_weights'),
             bias_attr=ParamAttr(name='fc6_offset'))
 
@@ -308,41 +315,48 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
 
 def ShuffleNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs):
     model = ShuffleNet(scale=0.25, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x0_25"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x0_25"], use_ssld=use_ssld)
     return model
 
 
 def ShuffleNetV2_x0_33(pretrained=False, use_ssld=False, **kwargs):
     model = ShuffleNet(scale=0.33, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x0_33"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x0_33"], use_ssld=use_ssld)
     return model
 
 
 def ShuffleNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs):
     model = ShuffleNet(scale=0.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x0_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x0_5"], use_ssld=use_ssld)
     return model
 
 
 def ShuffleNetV2_x1_0(pretrained=False, use_ssld=False, **kwargs):
     model = ShuffleNet(scale=1.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x1_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x1_0"], use_ssld=use_ssld)
     return model
 
 
 def ShuffleNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs):
     model = ShuffleNet(scale=1.5, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x1_5"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x1_5"], use_ssld=use_ssld)
     return model
 
 
 def ShuffleNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs):
     model = ShuffleNet(scale=2.0, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_x2_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_x2_0"], use_ssld=use_ssld)
     return model
 
 
 def ShuffleNetV2_swish(pretrained=False, use_ssld=False, **kwargs):
     model = ShuffleNet(scale=1.0, act="swish", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["ShuffleNetV2_swish"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["ShuffleNetV2_swish"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/squeezenet.py b/ppcls/arch/backbone/model_zoo/squeezenet.py
index a88a1bcff..647cd2ea7 100644
--- a/ppcls/arch/backbone/model_zoo/squeezenet.py
+++ b/ppcls/arch/backbone/model_zoo/squeezenet.py
@@ -22,9 +22,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "SqueezeNet1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_0_pretrained.pdparams",
-              "SqueezeNet1_1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_1_pretrained.pdparams",
-             }
+    "SqueezeNet1_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_0_pretrained.pdparams",
+    "SqueezeNet1_1":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_1_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -78,7 +80,7 @@ class MakeFire(nn.Layer):
 
 
 class SqueezeNet(nn.Layer):
-    def __init__(self, version, class_dim=1000):
+    def __init__(self, version, class_num=1000):
         super(SqueezeNet, self).__init__()
         self.version = version
 
@@ -125,7 +127,7 @@ class SqueezeNet(nn.Layer):
         self._drop = Dropout(p=0.5, mode="downscale_in_infer")
         self._conv9 = Conv2D(
             512,
-            class_dim,
+            class_num,
             1,
             weight_attr=ParamAttr(name="conv10_weights"),
             bias_attr=ParamAttr(name="conv10_offset"))
@@ -164,6 +166,7 @@ class SqueezeNet(nn.Layer):
         x = paddle.squeeze(x, axis=[2, 3])
         return x
 
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -175,15 +178,17 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
+
 
 def SqueezeNet1_0(pretrained=False, use_ssld=False, **kwargs):
     model = SqueezeNet(version="1.0", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SqueezeNet1_0"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SqueezeNet1_0"], use_ssld=use_ssld)
     return model
 
 
 def SqueezeNet1_1(pretrained=False, use_ssld=False, **kwargs):
     model = SqueezeNet(version="1.1", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SqueezeNet1_1"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["SqueezeNet1_1"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/swin_transformer.py b/ppcls/arch/backbone/model_zoo/swin_transformer.py
index a33bf5888..f4348fbae 100644
--- a/ppcls/arch/backbone/model_zoo/swin_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/swin_transformer.py
@@ -24,13 +24,19 @@ from .vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPat
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "SwinTransformer_tiny_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams",
-              "SwinTransformer_small_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_small_patch4_window7_224_pretrained.pdparams",
-              "SwinTransformer_base_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_pretrained.pdparams",
-              "SwinTransformer_base_patch4_window12_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams",
-              "SwinTransformer_large_patch4_window7_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_pretrained.pdparams",
-              "SwinTransformer_large_patch4_window12_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_pretrained.pdparams",
-             }
+    "SwinTransformer_tiny_patch4_window7_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams",
+    "SwinTransformer_small_patch4_window7_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_small_patch4_window7_224_pretrained.pdparams",
+    "SwinTransformer_base_patch4_window7_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_pretrained.pdparams",
+    "SwinTransformer_base_patch4_window12_384":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams",
+    "SwinTransformer_large_patch4_window7_224":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_pretrained.pdparams",
+    "SwinTransformer_large_patch4_window12_384":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_pretrained.pdparams",
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -611,7 +617,7 @@ class SwinTransformer(nn.Layer):
                  img_size=224,
                  patch_size=4,
                  in_chans=3,
-                 class_dim=1000,
+                 class_num=1000,
                  embed_dim=96,
                  depths=[2, 2, 6, 2],
                  num_heads=[3, 6, 12, 24],
@@ -629,7 +635,7 @@ class SwinTransformer(nn.Layer):
                  **kwargs):
         super(SwinTransformer, self).__init__()
 
-        self.num_classes = num_classes = class_dim
+        self.num_classes = num_classes = class_num
         self.num_layers = len(depths)
         self.embed_dim = embed_dim
         self.ape = ape
@@ -729,7 +735,7 @@ class SwinTransformer(nn.Layer):
         flops += self.num_features * self.num_classes
         return flops
 
-    
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -743,7 +749,9 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         )
 
 
-def SwinTransformer_tiny_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_tiny_patch4_window7_224(pretrained=False,
+                                            use_ssld=False,
+                                            **kwargs):
     model = SwinTransformer(
         embed_dim=96,
         depths=[2, 2, 6, 2],
@@ -751,22 +759,34 @@ def SwinTransformer_tiny_patch4_window7_224(pretrained=False, use_ssld=False, **
         window_size=7,
         drop_path_rate=0.2,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_tiny_patch4_window7_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_tiny_patch4_window7_224"],
+        use_ssld=use_ssld)
     return model
 
 
-def SwinTransformer_small_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_small_patch4_window7_224(pretrained=False,
+                                             use_ssld=False,
+                                             **kwargs):
     model = SwinTransformer(
         embed_dim=96,
         depths=[2, 2, 18, 2],
         num_heads=[3, 6, 12, 24],
         window_size=7,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_small_patch4_window7_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_small_patch4_window7_224"],
+        use_ssld=use_ssld)
     return model
 
 
-def SwinTransformer_base_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_base_patch4_window7_224(pretrained=False,
+                                            use_ssld=False,
+                                            **kwargs):
     model = SwinTransformer(
         embed_dim=128,
         depths=[2, 2, 18, 2],
@@ -774,11 +794,17 @@ def SwinTransformer_base_patch4_window7_224(pretrained=False, use_ssld=False, **
         window_size=7,
         drop_path_rate=0.5,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_base_patch4_window7_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_base_patch4_window7_224"],
+        use_ssld=use_ssld)
     return model
 
 
-def SwinTransformer_base_patch4_window12_384(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_base_patch4_window12_384(pretrained=False,
+                                             use_ssld=False,
+                                             **kwargs):
     model = SwinTransformer(
         img_size=384,
         embed_dim=128,
@@ -787,22 +813,34 @@ def SwinTransformer_base_patch4_window12_384(pretrained=False, use_ssld=False, *
         window_size=12,
         drop_path_rate=0.5,  # NOTE: do not appear in offical code
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_base_patch4_window12_384"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_base_patch4_window12_384"],
+        use_ssld=use_ssld)
     return model
 
 
-def SwinTransformer_large_patch4_window7_224(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_large_patch4_window7_224(pretrained=False,
+                                             use_ssld=False,
+                                             **kwargs):
     model = SwinTransformer(
         embed_dim=192,
         depths=[2, 2, 18, 2],
         num_heads=[6, 12, 24, 48],
         window_size=7,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_large_patch4_window7_224"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_large_patch4_window7_224"],
+        use_ssld=use_ssld)
     return model
 
 
-def SwinTransformer_large_patch4_window12_384(pretrained=False, use_ssld=False, **kwargs):
+def SwinTransformer_large_patch4_window12_384(pretrained=False,
+                                              use_ssld=False,
+                                              **kwargs):
     model = SwinTransformer(
         img_size=384,
         embed_dim=192,
@@ -810,5 +848,9 @@ def SwinTransformer_large_patch4_window12_384(pretrained=False, use_ssld=False,
         num_heads=[6, 12, 24, 48],
         window_size=12,
         **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["SwinTransformer_large_patch4_window12_384"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained,
+        model,
+        MODEL_URLS["SwinTransformer_large_patch4_window12_384"],
+        use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/tnt.py b/ppcls/arch/backbone/model_zoo/tnt.py
index 61f1083e4..9990584dc 100644
--- a/ppcls/arch/backbone/model_zoo/tnt.py
+++ b/ppcls/arch/backbone/model_zoo/tnt.py
@@ -23,16 +23,13 @@ from paddle.nn.initializer import TruncatedNormal, Constant
 from ppcls.arch.backbone.base.theseus_layer import Identity
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-
 MODEL_URLS = {
     "TNT_small":
     "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams"
 }
 
-
 __all__ = MODEL_URLS.keys()
 
-
 trunc_normal_ = TruncatedNormal(std=.02)
 zeros_ = Constant(value=0.)
 ones_ = Constant(value=1.)
@@ -66,8 +63,12 @@ class DropPath(nn.Layer):
 
 
 class Mlp(nn.Layer):
-    def __init__(self, in_features, hidden_features=None,
-                 out_features=None, act_layer=nn.GELU, drop=0.):
+    def __init__(self,
+                 in_features,
+                 hidden_features=None,
+                 out_features=None,
+                 act_layer=nn.GELU,
+                 drop=0.):
         super().__init__()
         out_features = out_features or in_features
         hidden_features = hidden_features or in_features
@@ -86,14 +87,19 @@ class Mlp(nn.Layer):
 
 
 class Attention(nn.Layer):
-    def __init__(self, dim, hidden_dim, num_heads=8, 
-                 qkv_bias=False, attn_drop=0., proj_drop=0.):
+    def __init__(self,
+                 dim,
+                 hidden_dim,
+                 num_heads=8,
+                 qkv_bias=False,
+                 attn_drop=0.,
+                 proj_drop=0.):
         super().__init__()
         self.hidden_dim = hidden_dim
         self.num_heads = num_heads
         head_dim = hidden_dim // num_heads
         self.head_dim = head_dim
-        self.scale = head_dim ** -0.5
+        self.scale = head_dim**-0.5
 
         self.qk = nn.Linear(dim, hidden_dim * 2, bias_attr=qkv_bias)
         self.v = nn.Linear(dim, dim, bias_attr=qkv_bias)
@@ -103,73 +109,103 @@ class Attention(nn.Layer):
 
     def forward(self, x):
         B, N, C = x.shape
-        qk = self.qk(x).reshape((B, N, 2, self.num_heads, self.head_dim)).transpose((2, 0, 3, 1, 4))
+        qk = self.qk(x).reshape(
+            (B, N, 2, self.num_heads, self.head_dim)).transpose(
+                (2, 0, 3, 1, 4))
 
         q, k = qk[0], qk[1]
-        v = self.v(x).reshape((B, N, self.num_heads, -1)).transpose((0, 2, 1, 3))
+        v = self.v(x).reshape((B, N, self.num_heads, -1)).transpose(
+            (0, 2, 1, 3))
 
-        attn = (q @ k.transpose((0, 1, 3, 2))) * self.scale
+        attn = (q @k.transpose((0, 1, 3, 2))) * self.scale
         attn = nn.functional.softmax(attn, axis=-1)
         attn = self.attn_drop(attn)
 
-        x = (attn @ v).transpose((0, 2, 1, 3)).reshape((B, N, -1))
+        x = (attn @v).transpose((0, 2, 1, 3)).reshape((B, N, -1))
         x = self.proj(x)
         x = self.proj_drop(x)
         return x
 
 
 class Block(nn.Layer):
-    def __init__(self, dim, in_dim, num_pixel, num_heads=12, in_num_head=4, mlp_ratio=4.,
-                 qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
+    def __init__(self,
+                 dim,
+                 in_dim,
+                 num_pixel,
+                 num_heads=12,
+                 in_num_head=4,
+                 mlp_ratio=4.,
+                 qkv_bias=False,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 act_layer=nn.GELU,
                  norm_layer=nn.LayerNorm):
         super().__init__()
         # Inner transformer
         self.norm_in = norm_layer(in_dim)
         self.attn_in = Attention(
-            in_dim, in_dim, num_heads=in_num_head, 
-            qkv_bias=qkv_bias, attn_drop=attn_drop, 
-            proj_drop=drop
-        )
+            in_dim,
+            in_dim,
+            num_heads=in_num_head,
+            qkv_bias=qkv_bias,
+            attn_drop=attn_drop,
+            proj_drop=drop)
 
         self.norm_mlp_in = norm_layer(in_dim)
-        self.mlp_in = Mlp(
-            in_features=in_dim, hidden_features=int(in_dim * 4),
-            out_features=in_dim, act_layer=act_layer, drop=drop
-        )
+        self.mlp_in = Mlp(in_features=in_dim,
+                          hidden_features=int(in_dim * 4),
+                          out_features=in_dim,
+                          act_layer=act_layer,
+                          drop=drop)
 
         self.norm1_proj = norm_layer(in_dim)
         self.proj = nn.Linear(in_dim * num_pixel, dim)
         # Outer transformer
         self.norm_out = norm_layer(dim)
         self.attn_out = Attention(
-            dim, dim, num_heads=num_heads, qkv_bias=qkv_bias,
-            attn_drop=attn_drop, proj_drop=drop
-        )
+            dim,
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            attn_drop=attn_drop,
+            proj_drop=drop)
 
         self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
 
         self.norm_mlp = norm_layer(dim)
-        self.mlp = Mlp(
-            in_features=dim, hidden_features=int(dim * mlp_ratio),
-            out_features=dim, act_layer=act_layer, drop=drop
-        )
+        self.mlp = Mlp(in_features=dim,
+                       hidden_features=int(dim * mlp_ratio),
+                       out_features=dim,
+                       act_layer=act_layer,
+                       drop=drop)
 
     def forward(self, pixel_embed, patch_embed):
         # inner
-        pixel_embed = pixel_embed + self.drop_path(self.attn_in(self.norm_in(pixel_embed)))
-        pixel_embed = pixel_embed + self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed)))
+        pixel_embed = pixel_embed + self.drop_path(
+            self.attn_in(self.norm_in(pixel_embed)))
+        pixel_embed = pixel_embed + self.drop_path(
+            self.mlp_in(self.norm_mlp_in(pixel_embed)))
         # outer
         B, N, C = patch_embed.shape
-        patch_embed[:, 1:] = patch_embed[:, 1:] + self.proj(self.norm1_proj(pixel_embed).reshape((B, N - 1, -1)))
-        patch_embed = patch_embed + self.drop_path(self.attn_out(self.norm_out(patch_embed)))
-        patch_embed = patch_embed + self.drop_path(self.mlp(self.norm_mlp(patch_embed)))
+        patch_embed[:, 1:] = patch_embed[:, 1:] + self.proj(
+            self.norm1_proj(pixel_embed).reshape((B, N - 1, -1)))
+        patch_embed = patch_embed + self.drop_path(
+            self.attn_out(self.norm_out(patch_embed)))
+        patch_embed = patch_embed + self.drop_path(
+            self.mlp(self.norm_mlp(patch_embed)))
         return pixel_embed, patch_embed
 
 
 class PixelEmbed(nn.Layer):
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, in_dim=48, stride=4):
+    def __init__(self,
+                 img_size=224,
+                 patch_size=16,
+                 in_chans=3,
+                 in_dim=48,
+                 stride=4):
         super().__init__()
-        num_patches = (img_size // patch_size) ** 2
+        num_patches = (img_size // patch_size)**2
         self.img_size = img_size
         self.num_patches = num_patches
         self.in_dim = in_dim
@@ -177,10 +213,7 @@ class PixelEmbed(nn.Layer):
         self.new_patch_size = new_patch_size
 
         self.proj = nn.Conv2D(
-            in_chans, self.in_dim,
-            kernel_size=7, padding=3, 
-            stride=stride
-        )
+            in_chans, self.in_dim, kernel_size=7, padding=3, stride=stride)
 
     def forward(self, x, pixel_pos):
         B, C, H, W = x.shape
@@ -188,50 +221,64 @@ class PixelEmbed(nn.Layer):
 
         x = self.proj(x)
         x = nn.functional.unfold(x, self.new_patch_size, self.new_patch_size)
-        x = x.transpose((0, 2, 1)).reshape((B * self.num_patches, self.in_dim, self.new_patch_size, self.new_patch_size))
+        x = x.transpose((0, 2, 1)).reshape(
+            (B * self.num_patches, self.in_dim, self.new_patch_size,
+             self.new_patch_size))
         x = x + pixel_pos
-        x = x.reshape((B * self.num_patches, self.in_dim, -1)).transpose((0, 2, 1))
+        x = x.reshape((B * self.num_patches, self.in_dim, -1)).transpose(
+            (0, 2, 1))
         return x
 
 
 class TNT(nn.Layer):
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, in_dim=48, depth=12,
-                 num_heads=12, in_num_head=4, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0.,
-                 drop_path_rate=0., norm_layer=nn.LayerNorm, first_stride=4, class_dim=1000):
+    def __init__(self,
+                 img_size=224,
+                 patch_size=16,
+                 in_chans=3,
+                 embed_dim=768,
+                 in_dim=48,
+                 depth=12,
+                 num_heads=12,
+                 in_num_head=4,
+                 mlp_ratio=4.,
+                 qkv_bias=False,
+                 drop_rate=0.,
+                 attn_drop_rate=0.,
+                 drop_path_rate=0.,
+                 norm_layer=nn.LayerNorm,
+                 first_stride=4,
+                 class_num=1000):
         super().__init__()
-        self.class_dim = class_dim
+        self.class_num = class_num
         # num_features for consistency with other models
         self.num_features = self.embed_dim = embed_dim
 
         self.pixel_embed = PixelEmbed(
-            img_size=img_size, patch_size=patch_size, 
-            in_chans=in_chans, in_dim=in_dim, stride=first_stride
-        )
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=in_chans,
+            in_dim=in_dim,
+            stride=first_stride)
         num_patches = self.pixel_embed.num_patches
         self.num_patches = num_patches
         new_patch_size = self.pixel_embed.new_patch_size
-        num_pixel = new_patch_size ** 2
+        num_pixel = new_patch_size**2
 
         self.norm1_proj = norm_layer(num_pixel * in_dim)
         self.proj = nn.Linear(num_pixel * in_dim, embed_dim)
         self.norm2_proj = norm_layer(embed_dim)
 
         self.cls_token = self.create_parameter(
-            shape=(1, 1, embed_dim), 
-            default_initializer=zeros_
-        )
+            shape=(1, 1, embed_dim), default_initializer=zeros_)
         self.add_parameter("cls_token", self.cls_token)
 
         self.patch_pos = self.create_parameter(
-            shape=(1, num_patches + 1, embed_dim), 
-            default_initializer=zeros_
-        )
+            shape=(1, num_patches + 1, embed_dim), default_initializer=zeros_)
         self.add_parameter("patch_pos", self.patch_pos)
 
         self.pixel_pos = self.create_parameter(
-            shape=(1, in_dim, new_patch_size, new_patch_size), 
-            default_initializer=zeros_
-        )
+            shape=(1, in_dim, new_patch_size, new_patch_size),
+            default_initializer=zeros_)
         self.add_parameter("pixel_pos", self.pixel_pos)
 
         self.pos_drop = nn.Dropout(p=drop_rate)
@@ -241,17 +288,24 @@ class TNT(nn.Layer):
 
         blocks = []
         for i in range(depth):
-            blocks.append(Block(
-                dim=embed_dim, in_dim=in_dim, num_pixel=num_pixel, num_heads=num_heads, 
-                in_num_head=in_num_head, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, 
-                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], 
-                norm_layer=norm_layer
-            ))
+            blocks.append(
+                Block(
+                    dim=embed_dim,
+                    in_dim=in_dim,
+                    num_pixel=num_pixel,
+                    num_heads=num_heads,
+                    in_num_head=in_num_head,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[i],
+                    norm_layer=norm_layer))
         self.blocks = nn.LayerList(blocks)
         self.norm = norm_layer(embed_dim)
 
-        if class_dim > 0:
-            self.head = nn.Linear(embed_dim, class_dim)
+        if class_num > 0:
+            self.head = nn.Linear(embed_dim, class_num)
 
         trunc_normal_(self.cls_token)
         trunc_normal_(self.patch_pos)
@@ -271,8 +325,12 @@ class TNT(nn.Layer):
         B = x.shape[0]
         pixel_embed = self.pixel_embed(x, self.pixel_pos)
 
-        patch_embed = self.norm2_proj(self.proj(self.norm1_proj(pixel_embed.reshape((B, self.num_patches, -1)))))
-        patch_embed = paddle.concat((self.cls_token.expand((B, -1, -1)), patch_embed), axis=1)
+        patch_embed = self.norm2_proj(
+            self.proj(
+                self.norm1_proj(
+                    pixel_embed.reshape((B, self.num_patches, -1)))))
+        patch_embed = paddle.concat(
+            (self.cls_token.expand((B, -1, -1)), patch_embed), axis=1)
         patch_embed = patch_embed + self.patch_pos
         patch_embed = self.pos_drop(patch_embed)
 
@@ -285,7 +343,7 @@ class TNT(nn.Layer):
     def forward(self, x):
         x = self.forward_features(x)
 
-        if self.class_dim > 0:
+        if self.class_num > 0:
             x = self.head(x)
         return x
 
@@ -304,15 +362,13 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
 
 
 def TNT_small(pretrained=False, **kwargs):
-    model = TNT(
-        patch_size=16,
-        embed_dim=384,
-        in_dim=24,
-        depth=12,
-        num_heads=6,
-        in_num_head=4,
-        qkv_bias=False,
-        **kwargs
-    )
+    model = TNT(patch_size=16,
+                embed_dim=384,
+                in_dim=24,
+                depth=12,
+                num_heads=6,
+                in_num_head=4,
+                qkv_bias=False,
+                **kwargs)
     _load_pretrained(pretrained, model, MODEL_URLS["TNT_small"])
     return model
diff --git a/ppcls/arch/backbone/model_zoo/vision_transformer.py b/ppcls/arch/backbone/model_zoo/vision_transformer.py
index ebe77684e..75d767be4 100644
--- a/ppcls/arch/backbone/model_zoo/vision_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/vision_transformer.py
@@ -231,7 +231,7 @@ class VisionTransformer(nn.Layer):
                  img_size=224,
                  patch_size=16,
                  in_chans=3,
-                 class_dim=1000,
+                 class_num=1000,
                  embed_dim=768,
                  depth=12,
                  num_heads=12,
@@ -245,7 +245,7 @@ class VisionTransformer(nn.Layer):
                  epsilon=1e-5,
                  **args):
         super().__init__()
-        self.class_dim = class_dim
+        self.class_num = class_num
 
         self.num_features = self.embed_dim = embed_dim
 
@@ -284,7 +284,7 @@ class VisionTransformer(nn.Layer):
 
         # Classifier head
         self.head = nn.Linear(embed_dim,
-                              class_dim) if class_dim > 0 else Identity()
+                              class_num) if class_num > 0 else Identity()
 
         trunc_normal_(self.pos_embed)
         trunc_normal_(self.cls_token)
diff --git a/ppcls/arch/backbone/model_zoo/xception.py b/ppcls/arch/backbone/model_zoo/xception.py
index 126c3dfdb..2b843788b 100644
--- a/ppcls/arch/backbone/model_zoo/xception.py
+++ b/ppcls/arch/backbone/model_zoo/xception.py
@@ -8,14 +8,16 @@ from paddle.nn.initializer import Uniform
 import math
 import sys
 
-
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
 MODEL_URLS = {
-              "Xception41": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_pretrained.pdparams",
-              "Xception65": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_pretrained.pdparams",
-              "Xception71": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception71_pretrained.pdparams"
-             }
+    "Xception41":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_pretrained.pdparams",
+    "Xception65":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_pretrained.pdparams",
+    "Xception71":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception71_pretrained.pdparams"
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -290,7 +292,7 @@ class ExitFlowBottleneckBlock(nn.Layer):
 
 
 class ExitFlow(nn.Layer):
-    def __init__(self, class_dim):
+    def __init__(self, class_num):
         super(ExitFlow, self).__init__()
 
         name = "exit_flow"
@@ -303,7 +305,7 @@ class ExitFlow(nn.Layer):
         stdv = 1.0 / math.sqrt(2048 * 1.0)
         self._out = Linear(
             2048,
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(
                 name="fc_weights", initializer=Uniform(-stdv, stdv)),
             bias_attr=ParamAttr(name="fc_offset"))
@@ -324,13 +326,13 @@ class Xception(nn.Layer):
     def __init__(self,
                  entry_flow_block_num=3,
                  middle_flow_block_num=8,
-                 class_dim=1000):
+                 class_num=1000):
         super(Xception, self).__init__()
         self.entry_flow_block_num = entry_flow_block_num
         self.middle_flow_block_num = middle_flow_block_num
         self._entry_flow = EntryFlow(entry_flow_block_num)
         self._middle_flow = MiddleFlow(middle_flow_block_num)
-        self._exit_flow = ExitFlow(class_dim)
+        self._exit_flow = ExitFlow(class_num)
 
     def forward(self, inputs):
         x = self._entry_flow(inputs)
@@ -338,6 +340,7 @@ class Xception(nn.Layer):
         x = self._exit_flow(x)
         return x
 
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -349,21 +352,26 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
         raise RuntimeError(
             "pretrained type is not available. Please use `string` or `boolean` type."
         )
-        
+
 
 def Xception41(pretrained=False, use_ssld=False, **kwargs):
     model = Xception(entry_flow_block_num=3, middle_flow_block_num=8, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Xception41"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Xception41"], use_ssld=use_ssld)
     return model
 
 
 def Xception65(pretrained=False, use_ssld=False, **kwargs):
-    model = Xception(entry_flow_block_num=3, middle_flow_block_num=16, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld)
+    model = Xception(
+        entry_flow_block_num=3, middle_flow_block_num=16, **kwargs)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld)
     return model
 
 
 def Xception71(pretrained=False, use_ssld=False, **kwargs):
-    model = Xception(entry_flow_block_num=5, middle_flow_block_num=16, **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld)
+    model = Xception(
+        entry_flow_block_num=5, middle_flow_block_num=16, **kwargs)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld)
     return model
diff --git a/ppcls/arch/backbone/model_zoo/xception_deeplab.py b/ppcls/arch/backbone/model_zoo/xception_deeplab.py
index dc8c234ec..c52769b37 100644
--- a/ppcls/arch/backbone/model_zoo/xception_deeplab.py
+++ b/ppcls/arch/backbone/model_zoo/xception_deeplab.py
@@ -21,8 +21,12 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 
 from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
 
-MODEL_URLS = {"Xception41_deeplab": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_deeplab_pretrained.pdparams",
-             "Xception65_deeplab": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_deeplab_pretrained.pdparams"}
+MODEL_URLS = {
+    "Xception41_deeplab":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_deeplab_pretrained.pdparams",
+    "Xception65_deeplab":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_deeplab_pretrained.pdparams"
+}
 
 __all__ = list(MODEL_URLS.keys())
 
@@ -268,7 +272,7 @@ class Xception_Block(nn.Layer):
 
 
 class XceptionDeeplab(nn.Layer):
-    def __init__(self, backbone, class_dim=1000):
+    def __init__(self, backbone, class_num=1000):
         super(XceptionDeeplab, self).__init__()
 
         bottleneck_params = gen_bottleneck_params(backbone)
@@ -370,7 +374,7 @@ class XceptionDeeplab(nn.Layer):
         self._pool = AdaptiveAvgPool2D(1)
         self._fc = Linear(
             self.chns[1][-1],
-            class_dim,
+            class_num,
             weight_attr=ParamAttr(name="fc_weights"),
             bias_attr=ParamAttr(name="fc_bias"))
 
@@ -388,8 +392,8 @@ class XceptionDeeplab(nn.Layer):
         x = paddle.squeeze(x, axis=[2, 3])
         x = self._fc(x)
         return x
-    
-    
+
+
 def _load_pretrained(pretrained, model, model_url, use_ssld=False):
     if pretrained is False:
         pass
@@ -405,11 +409,13 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
 
 def Xception41_deeplab(pretrained=False, use_ssld=False, **kwargs):
     model = XceptionDeeplab('xception_41', **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Xception41_deeplab"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Xception41_deeplab"], use_ssld=use_ssld)
     return model
 
 
 def Xception65_deeplab(pretrained=False, use_ssld=False, **kwargs):
     model = XceptionDeeplab("xception_65", **kwargs)
-    _load_pretrained(pretrained, model, MODEL_URLS["Xception65_deeplab"], use_ssld=use_ssld)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["Xception65_deeplab"], use_ssld=use_ssld)
     return model