merge with upstream/develop

2021-05-08 11:16:41 +00:00 · 2021-05-08 11:16:41 +00:00 · 5170153ee3
parent a9bce0409d 7ba0c51fe6
commit 5170153ee3
6 changed files with 43 additions and 16 deletions
--- a/docs/en/extension/train_with_DALI_en.md
+++ b/docs/en/extension/train_with_DALI_en.md
@ -49,8 +49,14 @@ python -m paddle.distributed.launch \

 ## Train with FP16

-On the basis of the above, using FP16 half-precision can further improve the training speed, just add fields in the start training command `AMP.use_pure_fp16=True`:
+On the basis of the above, using FP16 half-precision can further improve the training speed, you can refer to the following command.

 ```shell
-python tools/static/train.py -c configs/ResNet/ResNet50.yaml -o use_dali=True -o AMP.use_pure_fp16=True
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export FLAGS_fraction_of_gpu_memory_to_use=0.8
+
+python -m paddle.distributed.launch \
+    --gpus="0,1,2,3,4,5,6,7" \
+    tools/static/train.py \
+        -c configs/ResNet/ResNet50_fp16.yaml
 ```
--- a/docs/zh_CN/extension/train_with_DALI.md
+++ b/docs/zh_CN/extension/train_with_DALI.md
@ -48,9 +48,14 @@ python -m paddle.distributed.launch \
 ```

 ## 使用FP16训练
-
-在上述基础上，使用FP16半精度训练，可以进一步提高速度，只需在启动训练命令中添加字段`AMP.use_pure_fp16=True`：
+在上述基础上，使用FP16半精度训练，可以进一步提高速度，可以参考下面的配置与运行命令。

 ```shell
-python tools/static/train.py -c configs/ResNet/ResNet50.yaml -o use_dali=True -o AMP.use_pure_fp16=True
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export FLAGS_fraction_of_gpu_memory_to_use=0.8
+
+python -m paddle.distributed.launch \
+    --gpus="0,1,2,3,4,5,6,7" \
+    tools/static/train.py \
+    -c configs/ResNet/ResNet50_fp16.yaml
 ```
--- a/ppcls/modeling/architectures/swin_transformer.py
+++ b/ppcls/modeling/architectures/swin_transformer.py
@ -63,7 +63,7 @@ def window_partition(x, window_size):
    return windows


-def window_reverse(windows, window_size, H, W):
+def window_reverse(windows, window_size, H, W, C):
    """
    Args:
        windows: (num_windows*B, window_size, window_size, C)
@ -74,10 +74,9 @@ def window_reverse(windows, window_size, H, W):
    Returns:
        x: (B, H, W, C)
    """
-    B = int(windows.shape[0] / (H * W / window_size / window_size))
    x = windows.reshape(
-        [B, H // window_size, W // window_size, window_size, window_size, -1])
-    x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1])
+        [-1, H // window_size, W // window_size, window_size, window_size, C])
+    x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([-1, H, W, C])
    return x


@ -334,8 +333,8 @@ class SwinTransformerBlock(nn.Layer):
        # merge windows
        attn_windows = attn_windows.reshape(
            [-1, self.window_size, self.window_size, C])
-        shifted_x = window_reverse(attn_windows, self.window_size, H,
-                                   W)  # B H' W' C
+        shifted_x = window_reverse(attn_windows, self.window_size, H, W,
+                                   C)  # B H' W' C

        # reverse cyclic shift
        if self.shift_size > 0:
@ -406,7 +405,7 @@ class PatchMerging(nn.Layer):
        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
        x = paddle.concat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
-        x = x.reshape([B, -1, 4 * C])  # B H/2*W/2 4*C
+        x = x.reshape([B, H * W // 4, 4 * C])  # B H/2*W/2 4*C

        x = self.norm(x)
        x = self.reduction(x)
@ -551,10 +550,8 @@ class PatchEmbed(nn.Layer):

    def forward(self, x):
        B, C, H, W = x.shape
-        # FIXME look at relaxing size constraints
-        assert H == self.img_size[0] and W == self.img_size[1], \
-            "Input image size ({H}*{W}) doesn't match model ({}*{}).".format(
-                H, W, self.img_size[0], self.img_size[1])
+        # TODO (littletomatodonkey), uncomment the line will cause failure of jit.save
+        # assert [H, W] == self.img_size[:2], "Input image size ({H}*{W}) doesn't match model ({}*{}).".format(H, W, self.img_size[0], self.img_size[1])
        x = self.proj(x)

        x = x.flatten(2).transpose([0, 2, 1])  # B Ph*Pw C
--- a/tools/eval.py
+++ b/tools/eval.py
@ -72,6 +72,10 @@ def main(args, return_dict={}):

    init_model(config, net, optimizer=None)
    valid_dataloader = Reader(config, 'valid', places=place)()
+    if len(valid_dataloader) <= 0:
+        logger.error(
+            "valid dataloader is empty, please check your data config again!")
+        sys.exit(-1)
    net.eval()
    with paddle.no_grad():
        if not multilabel:
--- a/tools/export_model.py
+++ b/tools/export_model.py
@ -47,6 +47,12 @@ class Net(paddle.nn.Layer):
        self.pre_net = net(class_dim=class_dim)
        self.model = model

+    def eval(self):
+        self.training = False
+        for layer in self.sublayers():
+            layer.training = False
+            layer.eval()
+
    def forward(self, inputs):
        x = self.pre_net(inputs)
        if self.model == "GoogLeNet":
--- a/tools/train.py
+++ b/tools/train.py
@ -88,9 +88,18 @@ def main(args):
    init_model(config, net, optimizer)

    train_dataloader = Reader(config, 'train', places=place)()
+    if len(train_dataloader) <= 0:
+        logger.error(
+            "train dataloader is empty, please check your data config again!")
+        sys.exit(-1)

    if config.validate:
        valid_dataloader = Reader(config, 'valid', places=place)()
+        if len(valid_dataloader) <= 0:
+            logger.error(
+                "valid dataloader is empty, please check your data config again!"
+            )
+            sys.exit(-1)

    last_epoch_id = config.get("last_epoch", -1)
    best_top1_acc = 0.0  # best top1 acc record