diff --git a/ppcls/arch/backbone/variant_models/resnet_variant.py b/ppcls/arch/backbone/variant_models/resnet_variant.py
index eedd00846..19a8e08ab 100644
--- a/ppcls/arch/backbone/variant_models/resnet_variant.py
+++ b/ppcls/arch/backbone/variant_models/resnet_variant.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from collections import defaultdict
 import copy
 import paddle
@@ -137,9 +151,9 @@ def ResNet50_metabin(pretrained=False,
 
         def setup_opt(self, opt):
             """
-            enable_inside_update: enable inside updating for `gate` in MetaBIN
-            lr_gate: learning rate of `gate` during meta-train phase
-            bn_mode: control the running stats & updating of BN
+            Arg:
+                opt (dict): Optional setting to change the behavior of MetaBIN during training. 
+                    It includes three settings which are `enable_inside_update`, `lr_gate` and `bn_mode`.
             """
             self.check_opt(opt)
             self.opt = copy.deepcopy(opt)
diff --git a/ppcls/configs/reid/MetaBIN_ResNet50_single_source.yaml b/ppcls/configs/reid/MetaBIN_ResNet50_single_source.yaml
index 29463a638..aa1502fcb 100644
--- a/ppcls/configs/reid/MetaBIN_ResNet50_single_source.yaml
+++ b/ppcls/configs/reid/MetaBIN_ResNet50_single_source.yaml
@@ -1,7 +1,7 @@
 # global configs
 Global:
   checkpoints: null
-  pretrained_model: null #"metabin_resnet50_final"
+  pretrained_model: null
   output_dir: "./output/"
   device: "gpu"
   iter_per_epoch: &iter_per_epoch 50
@@ -29,7 +29,7 @@ Arch:
   name: "RecModel"
   Backbone:
     name: "ResNet50_metabin"
-    pretrained: False # "metabin_resnet50_backbone_pretrained"
+    pretrained: False
     bias_lr_factor: 2.0
   BackboneStopLayer:
     name: "flatten"
@@ -274,4 +274,4 @@ Metric:
   Eval:
     - Recallk:
         topk: [1, 5, 10]
-    - mAP: {}
\ No newline at end of file
+    - mAP: {}
diff --git a/ppcls/data/dataloader/metabin_sampler.py b/ppcls/data/dataloader/metabin_sampler.py
index 66f2009c0..f5cb29f75 100644
--- a/ppcls/data/dataloader/metabin_sampler.py
+++ b/ppcls/data/dataloader/metabin_sampler.py
@@ -22,6 +22,12 @@ from paddle.io import Sampler, BatchSampler
 class DomainShuffleSampler(Sampler):
     """
     Domain shuffle sampler
+    Args:
+        dataset(Dataset): Dataset for sampling
+        batch_size (int): Number of examples in a batch.
+        num_instances (int): Number of instances per identity in a batch.
+        camera_to_domain (bool): If True, consider each camera as an individual domain
+    
     Code was heavily based on https://github.com/bismex/MetaBIN
     reference: https://arxiv.org/abs/2011.14670v2
     """
@@ -177,9 +183,9 @@ class NaiveIdentitySampler(Sampler):
     Randomly sample N identities, then for each identity,
     randomly sample K instances, therefore batch size is N*K.
     Args:
-    - data_source (list): list of (img_path, pid, camid).
-    - num_instances (int): number of instances per identity in a batch.
-    - batch_size (int): number of examples in a batch.
+        dataset(Dataset): Dataset for sampling
+        batch_size (int): Number of examples in a batch.
+        num_instances (int): Number of instances per identity in a batch.
 
     Code was heavily based on https://github.com/bismex/MetaBIN
     reference: https://arxiv.org/abs/2011.14670v2
diff --git a/ppcls/loss/metabinloss.py b/ppcls/loss/metabinloss.py
index 90c5ef8c5..34159bdcd 100644
--- a/ppcls/loss/metabinloss.py
+++ b/ppcls/loss/metabinloss.py
@@ -36,12 +36,12 @@ def euclidean_dist(x, y):
 def hard_example_mining(dist_mat, is_pos, is_neg):
     """For each anchor, find the hardest positive and negative sample.
     Args:
-      dist_mat: pairwise distance between samples, shape [N, M]
-      is_pos: positive index with shape [N, M]
-      is_neg: negative index with shape [N, M]
+        dist_mat: pairwise distance between samples, shape [N, M]
+        is_pos: positive index with shape [N, M]
+        is_neg: negative index with shape [N, M]
     Returns:
-      dist_ap: distance(anchor, positive); shape [N, 1]
-      dist_an: distance(anchor, negative); shape [N, 1]
+        dist_ap: distance(anchor, positive); shape [N, 1]
+        dist_an: distance(anchor, negative); shape [N, 1]
     """
 
     inf = float("inf")
diff --git a/ppcls/optimizer/learning_rate.py b/ppcls/optimizer/learning_rate.py
index 87d5fd05e..d875d8bde 100644
--- a/ppcls/optimizer/learning_rate.py
+++ b/ppcls/optimizer/learning_rate.py
@@ -257,21 +257,21 @@ class Cyclic(LRBase):
     """Cyclic learning rate decay
     
     Args:
-        epochs (int): total epoch(s)
-        step_each_epoch (int): number of iterations within an epoch
+        epochs (int): Total epoch(s)
+        step_each_epoch (int): Number of iterations within an epoch
         base_learning_rate (float): Initial learning rate, which is the lower boundary in the cycle. The paper recommends
             that set the base_learning_rate to 1/3 or 1/4 of max_learning_rate.
         max_learning_rate (float): Maximum learning rate in the cycle. It defines the cycle amplitude as above.
             Since there is some scaling operation during process of learning rate adjustment,
             max_learning_rate may not actually be reached.
-        warmup_epoch (int): number of warmup epoch(s)
-        warmup_start_lr (float): start learning rate within warmup
+        warmup_epoch (int): Number of warmup epoch(s)
+        warmup_start_lr (float): Start learning rate within warmup
         step_size_up (int): Number of training steps, which is used to increase learning rate in a cycle.
             The step size of one cycle will be defined by step_size_up + step_size_down. According to the paper, step
             size should be set as at least 3 or 4 times steps in one epoch.
         step_size_down (int, optional): Number of training steps, which is used to decrease learning rate in a cycle.
             If not specified, it's value will initialize to `` step_size_up `` . Default: None
-        mode (str, optional): one of 'triangular', 'triangular2' or 'exp_range'.
+        mode (str, optional): One of 'triangular', 'triangular2' or 'exp_range'.
             If scale_fn is specified, this argument will be ignored. Default: 'triangular'
         exp_gamma (float): Constant in 'exp_range' scaling function: exp_gamma**iterations. Used only when mode = 'exp_range'. Default: 1.0
         scale_fn (function, optional): A custom scaling function, which is used to replace three build-in methods.
@@ -279,8 +279,8 @@ class Cyclic(LRBase):
             If specified, then 'mode' will be ignored. Default: None
         scale_mode (str, optional): One of 'cycle' or 'iterations'. Defines whether scale_fn is evaluated on cycle
             number or cycle iterations (total iterations since start of training). Default: 'cycle'
-        last_epoch (int, optional): The index of last epoch. Can be set to restart training.Default: -1, means initial learning rate.
-        by_epoch (bool): learning rate decays by epoch when by_epoch is True, else by iter
+        last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+        by_epoch (bool): Learning rate decays by epoch when by_epoch is True, else by iter
         verbose: (bool, optional): If True, prints a message to stdout for each update. Defaults to False
     """
 
@@ -300,7 +300,6 @@ class Cyclic(LRBase):
                  by_epoch=False,
                  last_epoch=-1,
                  verbose=False):
-
         super(Cyclic, self).__init__(
             epochs, step_each_epoch, base_learning_rate, warmup_epoch,
             warmup_start_lr, last_epoch, by_epoch, verbose)