change MultiScaleSamplerDDP into MultiScaleSampler

2022-01-05 12:46:29 +08:00 · 2022-01-05 12:46:29 +08:00 · b6cf278663
parent fe9f519b22
commit b6cf278663
5 changed files with 55 additions and 30 deletions
--- a/ppcls/configs/multi_scale/MobileNetV1_multi_scale.yaml
+++ b/ppcls/configs/multi_scale/MobileNetV1_multi_scale.yaml
@ -66,12 +66,14 @@ DataLoader:
            order: ''
            
    # support to specify width and height respectively:
-    # scales: [(160,160), (192,192), (256,256) (288,288) (320,320)]
+    # scales: [(160,160), (192,192), (224,225) (288,288) (320,320)]
    sampler:
-      name: MultiScaleSamplerDDP
-      scales: [160, 192, 256, 288, 320]
+      name: MultiScaleSampler
+      scales: [160, 192, 224, 288, 320]
+      # first_bs: batch size for the first image resolution in the scales list
+      # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
      first_bs: 64
-      down_sample: 32
+      divided_factor: 32
      is_training: True

    loader:
--- a/ppcls/data/init.py
+++ b/ppcls/data/init.py
@ -34,7 +34,7 @@ from ppcls.data.dataloader.multi_scale_dataset import MultiScaleDataset
 from ppcls.data.dataloader.DistributedRandomIdentitySampler import DistributedRandomIdentitySampler
 from ppcls.data.dataloader.pk_sampler import PKSampler
 from ppcls.data.dataloader.mix_sampler import MixSampler
-from ppcls.data.dataloader.multi_scale_sampler import MultiScaleSamplerDDP
+from ppcls.data.dataloader.multi_scale_sampler import MultiScaleSampler
 from ppcls.data import preprocess
 from ppcls.data.preprocess import transform

--- a/ppcls/data/dataloader/init.py
+++ b/ppcls/data/dataloader/init.py
@ -7,5 +7,5 @@ from ppcls.data.dataloader.icartoon_dataset import ICartoonDataset
 from ppcls.data.dataloader.mix_dataset import MixDataset
 from ppcls.data.dataloader.multi_scale_dataset import MultiScaleDataset
 from ppcls.data.dataloader.mix_sampler import MixSampler
-from ppcls.data.dataloader.multi_scale_sampler import MultiScaleSamplerDDP
+from ppcls.data.dataloader.multi_scale_sampler import MultiScaleSampler
 from ppcls.data.dataloader.pk_sampler import PKSampler
--- a/ppcls/data/dataloader/multi_scale_dataset.py
+++ b/ppcls/data/dataloader/multi_scale_dataset.py
@ -41,6 +41,7 @@ class MultiScaleDataset(Dataset):
        self.images = []
        self.labels = []
        self._load_anno()
+        self.has_crop_flag = 1

    def _load_anno(self, seed=None):
        assert os.path.exists(self._cls_path)
@ -70,9 +71,15 @@ class MultiScaleDataset(Dataset):
                resize_op = ['RandCropImage', 'ResizeImage', 'CropImage']
                for resize in resize_op:
                    if resize in op:
-                        logger.error("Multi scale dataset will crop image according to the multi scale resolution")
-                        self.transform_ops[i][resize] = {'size': (img_height, img_width)}
+                        if self.has_crop_flag:
+                            logger.error(
+                                "Multi scale dataset will crop image according to the multi scale resolution"
+                            )
+                        self.transform_ops[i][resize] = {
+                            'size': (img_height, img_width)
+                        }
                        has_crop = True
+                        self.has_crop_flag = 0
        if has_crop == False:
            logger.error("Multi scale dateset requests RandCropImage")
            raise RuntimeError("Multi scale dateset requests RandCropImage")
@ -82,7 +89,7 @@ class MultiScaleDataset(Dataset):
            with open(self.images[index], 'rb') as f:
                img = f.read()
            if self._transform_ops:
-                img = transform(img, self._transform_ops) 
+                img = transform(img, self._transform_ops)
            img = img.transpose((2, 0, 1))
            return (img, self.labels[index])

--- a/ppcls/data/dataloader/multi_scale_sampler.py
+++ b/ppcls/data/dataloader/multi_scale_sampler.py
@ -7,8 +7,15 @@ import numpy as np

 from ppcls import data

-class MultiScaleSamplerDDP(Sampler):
-    def __init__(self, data_source, scales, first_bs, divided_factor=32, is_training = True, seed=None):
+
+class MultiScaleSampler(Sampler):
+    def __init__(self,
+                 data_source,
+                 scales,
+                 first_bs,
+                 divided_factor=32,
+                 is_training=True,
+                 seed=None):
        """
            multi scale samper
            Args:
@ -21,7 +28,7 @@ class MultiScaleSamplerDDP(Sampler):
        # min. and max. spatial dimensions
        self.data_source = data_source
        self.n_data_samples = len(self.data_source)
-        
+
        if isinstance(scales[0], tuple):
            width_dims = [i[0] for i in scales]
            height_dims = [i[1] for i in scales]
@ -31,12 +38,13 @@ class MultiScaleSamplerDDP(Sampler):
        base_im_w = width_dims[0]
        base_im_h = height_dims[0]
        base_batch_size = first_bs
-        
+
        # Get the GPU and node related information
-        num_replicas  =dist.get_world_size()
+        num_replicas = dist.get_world_size()
        rank = dist.get_rank()
        # adjust the total samples to avoid batch dropping
-        num_samples_per_replica = int(math.ceil(self.n_data_samples * 1.0 / num_replicas))
+        num_samples_per_replica = int(
+            math.ceil(self.n_data_samples * 1.0 / num_replicas))
        img_indices = [idx for idx in range(self.n_data_samples)]

        self.shuffle = False
@ -44,8 +52,13 @@ class MultiScaleSamplerDDP(Sampler):
            # compute the spatial dimensions and corresponding batch size
            # ImageNet models down-sample images by a factor of 32.
            # Ensure that width and height dimensions are multiples are multiple of 32.
-            width_dims = [int((w // divided_factor) * divided_factor) for w in width_dims]
-            height_dims = [int((h // divided_factor) * divided_factor) for h in height_dims]
+            width_dims = [
+                int((w // divided_factor) * divided_factor) for w in width_dims
+            ]
+            height_dims = [
+                int((h // divided_factor) * divided_factor)
+                for h in height_dims
+            ]

            img_batch_pairs = list()
            base_elements = base_im_w * base_im_h * base_batch_size
@ -55,8 +68,8 @@ class MultiScaleSamplerDDP(Sampler):
            self.img_batch_pairs = img_batch_pairs
            self.shuffle = True
        else:
-            self.img_batch_pairs = [(base_im_h , base_im_w , base_batch_size)]
-        
+            self.img_batch_pairs = [(base_im_h, base_im_w, base_batch_size)]
+
        self.img_indices = img_indices
        self.n_samples_per_replica = num_samples_per_replica
        self.epoch = 0
@ -65,21 +78,23 @@ class MultiScaleSamplerDDP(Sampler):
        self.seed = seed
        self.batch_list = []
        self.current = 0
-        indices_rank_i = self.img_indices[self.rank : len(self.img_indices) : self.num_replicas]
+        indices_rank_i = self.img_indices[self.rank:len(self.img_indices):
+                                          self.num_replicas]
        while self.current < self.n_samples_per_replica:
            curr_h, curr_w, curr_bsz = random.choice(self.img_batch_pairs)

-            end_index = min(self.current + curr_bsz, self.n_samples_per_replica)
+            end_index = min(self.current + curr_bsz,
+                            self.n_samples_per_replica)

            batch_ids = indices_rank_i[self.current:end_index]
            n_batch_samples = len(batch_ids)
            if n_batch_samples != curr_bsz:
-                    batch_ids += indices_rank_i[:(curr_bsz - n_batch_samples)]
+                batch_ids += indices_rank_i[:(curr_bsz - n_batch_samples)]
            self.current += curr_bsz

            if len(batch_ids) > 0:
-                    batch = [curr_h, curr_w, len(batch_ids)]
-                    self.batch_list.append(batch)
+                batch = [curr_h, curr_w, len(batch_ids)]
+                self.batch_list.append(batch)
        self.length = len(self.batch_list)

    def __iter__(self):
@ -90,9 +105,11 @@ class MultiScaleSamplerDDP(Sampler):
                random.seed(self.epoch)
            random.shuffle(self.img_indices)
            random.shuffle(self.img_batch_pairs)
-            indices_rank_i = self.img_indices[self.rank : len(self.img_indices) : self.num_replicas]
+            indices_rank_i = self.img_indices[self.rank:len(self.img_indices):
+                                              self.num_replicas]
        else:
-            indices_rank_i = self.img_indices[self.rank : len(self.img_indices) : self.num_replicas]
+            indices_rank_i = self.img_indices[self.rank:len(self.img_indices):
+                                              self.num_replicas]

        start_index = 0
        for batch_tuple in self.batch_list:
@ -101,16 +118,15 @@ class MultiScaleSamplerDDP(Sampler):
            batch_ids = indices_rank_i[start_index:end_index]
            n_batch_samples = len(batch_ids)
            if n_batch_samples != curr_bsz:
-                    batch_ids += indices_rank_i[:(curr_bsz - n_batch_samples)]
+                batch_ids += indices_rank_i[:(curr_bsz - n_batch_samples)]
            start_index += curr_bsz

            if len(batch_ids) > 0:
-                    batch = [(curr_h, curr_w, b_id) for b_id in batch_ids]
-                    yield batch
+                batch = [(curr_h, curr_w, b_id) for b_id in batch_ids]
+                yield batch

    def set_epoch(self, epoch: int):
        self.epoch = epoch

    def __len__(self):
        return self.length
-