polish code

2019-03-15 14:49:18 +00:00 · 2019-03-15 14:49:18 +00:00 · e2b1001165
parent 39fa787153
commit e2b1001165
16 changed files with 273 additions and 354 deletions
--- a/torchreid/datasets/bases.py
+++ b/torchreid/datasets/bases.py
@ -7,12 +7,17 @@ import numpy as np


 class BaseDataset(object):
-    """
-    Base class of reid dataset
-    """
+    """Base class of reid dataset"""
+    
    def __init__(self, root):
        self.root = osp.expanduser(root)

+    def check_before_run(self, required_files):
+        """Check if required files exist before going deeper"""
+        for f in required_files:
+            if not osp.exists(f):
+                raise RuntimeError('"{}" is not found'.format(f))
+
    def get_imagedata_info(self, data):
        pids, cams = [], []
        for _, pid, camid in data:
@ -45,16 +50,14 @@ class BaseDataset(object):


 class BaseImageDataset(BaseDataset):
-    """
-    Base class of image reid dataset
-    """
+    """Base class of image-reid dataset"""

    def print_dataset_statistics(self, train, query, gallery):
        num_train_pids, num_train_imgs, num_train_cams = self.get_imagedata_info(train)
        num_query_pids, num_query_imgs, num_query_cams = self.get_imagedata_info(query)
        num_gallery_pids, num_gallery_imgs, num_gallery_cams = self.get_imagedata_info(gallery)

-        print('Image Dataset statistics:')
+        print('=> Loaded {}'.format(self.__class__.__name__))
        print('  ----------------------------------------')
        print('  subset   | # ids | # images | # cameras')
        print('  ----------------------------------------')
@ -65,9 +68,7 @@ class BaseImageDataset(BaseDataset):


 class BaseVideoDataset(BaseDataset):
-    """
-    Base class of video reid dataset
-    """
+    """Base class of video-reid dataset"""

    def print_dataset_statistics(self, train, query, gallery):
        num_train_pids, num_train_tracklets, num_train_cams, train_tracklet_stats = \
@ -84,7 +85,7 @@ class BaseVideoDataset(BaseDataset):
        max_num = np.max(tracklet_stats)
        avg_num = np.mean(tracklet_stats)

-        print('Video Dataset statistics:')
+        print('=> Loaded {}'.format(self.__class__.__name__))
        print('  -------------------------------------------')
        print('  subset   | # ids | # tracklets | # cameras')
        print('  -------------------------------------------')
--- a/torchreid/datasets/cuhk01.py
+++ b/torchreid/datasets/cuhk01.py
@ -20,8 +20,7 @@ from .bases import BaseImageDataset


 class CUHK01(BaseImageDataset):
-    """
-    CUHK01
+    """CUHK01

    Reference:
    Li et al. Human Reidentification with Transferred Metric Learning. ACCV 2012.
@ -43,7 +42,12 @@ class CUHK01(BaseImageDataset):
        self.split_path = osp.join(self.dataset_dir, 'splits.json')

        self.extract_file()
-        self.check_before_run()
+        
+        required_files = [
+            self.dataset_dir,
+            self.campus_dir
+        ]
+        self.check_before_run(required_files)

        self.prepare_split()
        splits = read_json(self.split_path)
@ -60,7 +64,6 @@ class CUHK01(BaseImageDataset):
        gallery = [tuple(item) for item in gallery]

        if verbose:
-            print('=> CUHK01 loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -77,14 +80,6 @@ class CUHK01(BaseImageDataset):
            zip_ref = zipfile.ZipFile(self.zip_path, 'r')
            zip_ref.extractall(self.dataset_dir)
            zip_ref.close()
-        print('Files extracted')
-
-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.campus_dir):
-            raise RuntimeError('"{}" is not available'.format(self.campus_dir))

    def prepare_split(self):
        """
@ -116,7 +111,6 @@ class CUHK01(BaseImageDataset):
                idx2label = {idx: label for label, idx in enumerate(train_idxs)}

                train, test_a, test_b = [], [], []
-
                for img_path, pid, camid in img_list:
                    if pid in train_idxs:
                        train.append((img_path, idx2label[pid], camid))
@ -127,23 +121,27 @@ class CUHK01(BaseImageDataset):
                            test_b.append((img_path, pid, camid))

                # use cameraA as query and cameraB as gallery
-                split = {'train': train, 'query': test_a, 'gallery': test_b,
-                         'num_train_pids': num_train_pids,
-                         'num_query_pids': num_pids - num_train_pids,
-                         'num_gallery_pids': num_pids - num_train_pids,
-                         }
+                split = {
+                    'train': train,
+                    'query': test_a,
+                    'gallery': test_b,
+                    'num_train_pids': num_train_pids,
+                    'num_query_pids': num_pids - num_train_pids,
+                    'num_gallery_pids': num_pids - num_train_pids
+                }
                splits.append(split)

                # use cameraB as query and cameraA as gallery
-                split = {'train': train, 'query': test_b, 'gallery': test_a,
-                         'num_train_pids': num_train_pids,
-                         'num_query_pids': num_pids - num_train_pids,
-                         'num_gallery_pids': num_pids - num_train_pids,
-                         }
+                split = {
+                    'train': train,
+                    'query': test_b,
+                    'gallery': test_a,
+                    'num_train_pids': num_train_pids,
+                    'num_query_pids': num_pids - num_train_pids,
+                    'num_gallery_pids': num_pids - num_train_pids
+                }
                splits.append(split)

            print('Totally {} splits are created'.format(len(splits)))
            write_json(splits, self.split_path)
-            print('Split file saved to {}'.format(self.split_path))
-
-        print('Splits created')
+            print('Split file saved to {}'.format(self.split_path))
--- a/torchreid/datasets/cuhk03.py
+++ b/torchreid/datasets/cuhk03.py
@ -20,8 +20,7 @@ from .bases import BaseImageDataset


 class CUHK03(BaseImageDataset):
-    """
-    CUHK03
+    """CUHK03

    Reference:
    Li et al. DeepReID: Deep Filter Pairing Neural Network for Person Re-identification. CVPR 2014.
@ -58,27 +57,31 @@ class CUHK03(BaseImageDataset):
        self.split_new_det_mat_path = osp.join(self.dataset_dir, 'cuhk03_new_protocol_config_detected.mat')
        self.split_new_lab_mat_path = osp.join(self.dataset_dir, 'cuhk03_new_protocol_config_labeled.mat')

-        self.check_before_run()
-        self.preprocess()
+        required_files = [
+            self.dataset_dir,
+            self.data_dir,
+            self.raw_mat_path,
+            self.split_new_det_mat_path,
+            self.split_new_lab_mat_path
+        ]
+        self.check_before_run(required_files)
+
+        self.preprocess_split()

        if cuhk03_labeled:
-            image_type = 'labeled'
            split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path
        else:
-            image_type = 'detected'
            split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path

        splits = read_json(split_path)
        assert split_id < len(splits), 'Condition split_id ({}) < len(splits) ({}) is false'.format(split_id, len(splits))
        split = splits[split_id]
-        print('Split index = {}'.format(split_id))

        train = split['train']
        query = split['query']
        gallery = split['gallery']

        if verbose:
-            print('=> CUHK03 ({}) loaded'.format(image_type))
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -89,39 +92,25 @@ class CUHK03(BaseImageDataset):
        self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query)
        self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.data_dir):
-            raise RuntimeError('"{}" is not available'.format(self.data_dir))
-        if not osp.exists(self.raw_mat_path):
-            raise RuntimeError('"{}" is not available'.format(self.raw_mat_path))
-        if not osp.exists(self.split_new_det_mat_path):
-            raise RuntimeError('"{}" is not available'.format(self.split_new_det_mat_path))
-        if not osp.exists(self.split_new_lab_mat_path):
-            raise RuntimeError('"{}" is not available'.format(self.split_new_lab_mat_path))
-
-    def preprocess(self):
+    def preprocess_split(self):
        """
        This function is a bit complex and ugly, what it does is
-        1. Extract data from cuhk-03.mat and save as png images.
-        2. Create 20 classic splits. (Li et al. CVPR'14)
-        3. Create new split. (Zhong et al. CVPR'17)
+        1. extract data from cuhk-03.mat and save as png images
+        2. create 20 classic splits (Li et al. CVPR'14)
+        3. create new split (Zhong et al. CVPR'17)
        """
-        print('Note: if root path is changed, the previously generated json files need to be re-generated (delete them first)')
-        if osp.exists(self.imgs_labeled_dir) and \
-           osp.exists(self.imgs_detected_dir) and \
-           osp.exists(self.split_classic_det_json_path) and \
-           osp.exists(self.split_classic_lab_json_path) and \
-           osp.exists(self.split_new_det_json_path) and \
-           osp.exists(self.split_new_lab_json_path):
-            return
+        if osp.exists(self.imgs_labeled_dir) \
+           and osp.exists(self.imgs_detected_dir) \
+           and osp.exists(self.split_classic_det_json_path) \
+           and osp.exists(self.split_classic_lab_json_path) \
+           and osp.exists(self.split_new_det_json_path) \
+           and osp.exists(self.split_new_lab_json_path):
+           return

        mkdir_if_missing(self.imgs_detected_dir)
        mkdir_if_missing(self.imgs_labeled_dir)

-        print('Extract image data from {} and save as png'.format(self.raw_mat_path))
+        print('Extract image data from "{}" and save as png'.format(self.raw_mat_path))
        mat = h5py.File(self.raw_mat_path, 'r')

        def _deref(ref):
@ -131,14 +120,14 @@ class CUHK03(BaseImageDataset):
            img_paths = [] # Note: some persons only have images for one view
            for imgid, img_ref in enumerate(img_refs):
                img = _deref(img_ref)
-                # skip empty cell
-                if img.size == 0 or img.ndim < 3: continue
+                if img.size==0 or img.ndim<3:
+                    continue # skip empty cell
                # images are saved with the following format, index-1 (ensure uniqueness)
                # campid: index of camera pair (1-5)
                # pid: index of person in 'campid'-th camera pair
                # viewid: index of view, {1, 2}
                # imgid: index of image, (1-10)
-                viewid = 1 if imgid < 5 else 2
+                viewid = 1 if imgid<5 else 2
                img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format(campid+1, pid+1, viewid, imgid+1)
                img_path = osp.join(save_dir, img_name)
                if not osp.isfile(img_path):
@ -146,11 +135,11 @@ class CUHK03(BaseImageDataset):
                img_paths.append(img_path)
            return img_paths

-        def _extract_img(name):
-            print('Processing {} images (extract and save) ...'.format(name))
+        def _extract_img(image_type):
+            print('Processing {} images ...'.format(image_type))
            meta_data = []
-            imgs_dir = self.imgs_detected_dir if name == 'detected' else self.imgs_labeled_dir
-            for campid, camp_ref in enumerate(mat[name][0]):
+            imgs_dir = self.imgs_detected_dir if image_type=='detected' else self.imgs_labeled_dir
+            for campid, camp_ref in enumerate(mat[image_type][0]):
                camp = _deref(camp_ref)
                num_pids = camp.shape[0]
                for pid in range(num_pids):
@ -218,7 +207,8 @@ class CUHK03(BaseImageDataset):
                img_name = filelist[idx][0]
                camid = int(img_name.split('_')[2]) - 1 # make it 0-based
                pid = pids[idx]
-                if relabel: pid = pid2label[pid]
+                if relabel:
+                    pid = pid2label[pid]
                img_path = osp.join(img_dir, img_name)
                tmp_set.append((img_path, int(pid), camid))
                unique_pids.add(pid)
@ -237,28 +227,38 @@ class CUHK03(BaseImageDataset):
            gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False)
            return train_info, query_info, gallery_info

-        print('Creating new splits for detected images (767/700) ...')
+        print('Creating new split for detected images (767/700) ...')
        train_info, query_info, gallery_info = _extract_new_split(
            loadmat(self.split_new_det_mat_path),
            self.imgs_detected_dir,
        )
-        splits = [{
-            'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0],
-            'num_train_pids': train_info[1], 'num_train_imgs': train_info[2],
-            'num_query_pids': query_info[1], 'num_query_imgs': query_info[2],
-            'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2],
+        split = [{
+            'train': train_info[0],
+            'query': query_info[0],
+            'gallery': gallery_info[0],
+            'num_train_pids': train_info[1],
+            'num_train_imgs': train_info[2],
+            'num_query_pids': query_info[1],
+            'num_query_imgs': query_info[2],
+            'num_gallery_pids': gallery_info[1],
+            'num_gallery_imgs': gallery_info[2],
        }]
-        write_json(splits, self.split_new_det_json_path)
+        write_json(split, self.split_new_det_json_path)

-        print('Creating new splits for labeled images (767/700) ...')
+        print('Creating new split for labeled images (767/700) ...')
        train_info, query_info, gallery_info = _extract_new_split(
            loadmat(self.split_new_lab_mat_path),
            self.imgs_labeled_dir,
        )
-        splits = [{
-            'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0],
-            'num_train_pids': train_info[1], 'num_train_imgs': train_info[2],
-            'num_query_pids': query_info[1], 'num_query_imgs': query_info[2],
-            'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2],
+        split = [{
+            'train': train_info[0],
+            'query': query_info[0],
+            'gallery': gallery_info[0],
+            'num_train_pids': train_info[1],
+            'num_train_imgs': train_info[2],
+            'num_query_pids': query_info[1],
+            'num_query_imgs': query_info[2],
+            'num_gallery_pids': gallery_info[1],
+            'num_gallery_imgs': gallery_info[2],
        }]
-        write_json(splits, self.split_new_lab_json_path)
+        write_json(split, self.split_new_lab_json_path)
--- a/torchreid/datasets/dukemtmcreid.py
+++ b/torchreid/datasets/dukemtmcreid.py
@ -20,8 +20,7 @@ from .bases import BaseImageDataset


 class DukeMTMCreID(BaseImageDataset):
-    """
-    DukeMTMC-reID
+    """DukeMTMC-reID

    Reference:
    1. Ristani et al. Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking. ECCVW 2016.
@ -45,14 +44,20 @@ class DukeMTMCreID(BaseImageDataset):
        self.gallery_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/bounding_box_test')

        self.download_data()
-        self.check_before_run()
+        
+        required_files = [
+            self.dataset_dir,
+            self.train_dir,
+            self.query_dir,
+            self.gallery_dir
+        ]
+        self.check_before_run(required_files)

        train = self.process_dir(self.train_dir, relabel=True)
        query = self.process_dir(self.query_dir, relabel=False)
        gallery = self.process_dir(self.gallery_dir, relabel=False)

        if verbose:
-            print('=> DukeMTMC-reID loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -65,7 +70,6 @@ class DukeMTMCreID(BaseImageDataset):

    def download_data(self):
        if osp.exists(self.dataset_dir):
-            print('This dataset has been downloaded.')
            return

        print('Creating directory {}'.format(self.dataset_dir))
@ -80,17 +84,6 @@ class DukeMTMCreID(BaseImageDataset):
        zip_ref.extractall(self.dataset_dir)
        zip_ref.close()

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.train_dir):
-            raise RuntimeError('"{}" is not available'.format(self.train_dir))
-        if not osp.exists(self.query_dir):
-            raise RuntimeError('"{}" is not available'.format(self.query_dir))
-        if not osp.exists(self.gallery_dir):
-            raise RuntimeError('"{}" is not available'.format(self.gallery_dir))
-
    def process_dir(self, dir_path, relabel=False):
        img_paths = glob.glob(osp.join(dir_path, '*.jpg'))
        pattern = re.compile(r'([-\d]+)_c(\d)')
--- a/torchreid/datasets/dukemtmcvidreid.py
+++ b/torchreid/datasets/dukemtmcvidreid.py
@ -20,8 +20,7 @@ from .bases import BaseVideoDataset


 class DukeMTMCVidReID(BaseVideoDataset):
-    """
-    DukeMTMCVidReID
+    """DukeMTMCVidReID

    Reference:
    Wu et al. Exploit the Unknown Gradually: One-Shot Video-Based Person
@ -45,18 +44,23 @@ class DukeMTMCVidReID(BaseVideoDataset):
        self.split_train_json_path = osp.join(self.dataset_dir, 'split_train.json')
        self.split_query_json_path = osp.join(self.dataset_dir, 'split_query.json')
        self.split_gallery_json_path = osp.join(self.dataset_dir, 'split_gallery.json')
-
        self.min_seq_len = min_seq_len
+
        self.download_data()
-        self.check_before_run()
-        print('Note: if root path is changed, the previously generated json files need to be re-generated (so delete them first)')
+
+        required_files = [
+            self.dataset_dir,
+            self.train_dir,
+            self.query_dir,
+            self.gallery_dir
+        ]
+        self.check_before_run(required_files)

        train = self.process_dir(self.train_dir, self.split_train_json_path, relabel=True)
        query = self.process_dir(self.query_dir, self.split_query_json_path, relabel=False)
        gallery = self.process_dir(self.gallery_dir, self.split_gallery_json_path, relabel=False)

        if verbose:
-            print('=> DukeMTMC-VideoReID loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -69,7 +73,6 @@ class DukeMTMCVidReID(BaseVideoDataset):

    def download_data(self):
        if osp.exists(self.dataset_dir):
-            print('This dataset has been downloaded.')
            return

        print('Creating directory {}'.format(self.dataset_dir))
@ -84,24 +87,12 @@ class DukeMTMCVidReID(BaseVideoDataset):
        zip_ref.extractall(self.dataset_dir)
        zip_ref.close()

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.train_dir):
-            raise RuntimeError('"{}" is not available'.format(self.train_dir))
-        if not osp.exists(self.query_dir):
-            raise RuntimeError('"{}" is not available'.format(self.query_dir))
-        if not osp.exists(self.gallery_dir):
-            raise RuntimeError('"{}" is not available'.format(self.gallery_dir))
-
    def process_dir(self, dir_path, json_path, relabel):
        if osp.exists(json_path):
-            print('=> {} generated before, awesome!'.format(json_path))
            split = read_json(json_path)
            return split['tracklets']

-        print('=> Automatically generating split (might take a while for the first time, have a coffe)')
+        print('=> Generating split json file (** this might take a while **)')
        pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store
        print('Processing "{}" with {} person identities'.format(dir_path, len(pdirs)))

@ -114,7 +105,8 @@ class DukeMTMCVidReID(BaseVideoDataset):
        tracklets = []
        for pdir in pdirs:
            pid = int(osp.basename(pdir))
-            if relabel: pid = pid2label[pid]
+            if relabel:
+                pid = pid2label[pid]
            tdirs = glob.glob(osp.join(pdir, '*'))
            for tdir in tdirs:
                raw_img_paths = glob.glob(osp.join(tdir, '*.jpg'))
--- a/torchreid/datasets/grid.py
+++ b/torchreid/datasets/grid.py
@ -20,8 +20,7 @@ from .bases import BaseImageDataset


 class GRID(BaseImageDataset):
-    """
-    GRID
+    """GRID

    Reference:
    Loy et al. Multi-camera activity correlation analysis. CVPR 2009.
@ -45,7 +44,14 @@ class GRID(BaseImageDataset):
        self.split_path = osp.join(self.dataset_dir, 'splits.json')

        self.download_data()
-        self.check_before_run()
+        
+        required_files = [
+            self.dataset_dir,
+            self.probe_path,
+            self.gallery_path,
+            self.split_mat_path
+        ]
+        self.check_before_run(required_files)

        self.prepare_split()
        splits = read_json(self.split_path)
@ -62,7 +68,6 @@ class GRID(BaseImageDataset):
        gallery = [tuple(item) for item in gallery]

        if verbose:
-            print('=> GRID loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -73,20 +78,8 @@ class GRID(BaseImageDataset):
        self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query)
        self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.probe_path):
-            raise RuntimeError('"{}" is not available'.format(self.probe_path))
-        if not osp.exists(self.gallery_path):
-            raise RuntimeError('"{}" is not available'.format(self.gallery_path))
-        if not osp.exists(self.split_mat_path):
-            raise RuntimeError('"{}" is not available'.format(self.split_mat_path))
-
    def download_data(self):
        if osp.exists(self.dataset_dir):
-            print('This dataset has been downloaded.')
            return

        print('Creating directory {}'.format(self.dataset_dir))
@ -123,10 +116,8 @@ class GRID(BaseImageDataset):
                    img_idx = int(img_name.split('_')[0])
                    camid = int(img_name.split('_')[1]) - 1 # index starts from 0
                    if img_idx in train_idxs:
-                        # add to train data
                        train.append((img_path, idx2label[img_idx], camid))
                    else:
-                        # add to query data
                        query.append((img_path, img_idx, camid))
                
                # process gallery folder
@ -135,21 +126,20 @@ class GRID(BaseImageDataset):
                    img_idx = int(img_name.split('_')[0])
                    camid = int(img_name.split('_')[1]) - 1 # index starts from 0
                    if img_idx in train_idxs:
-                        # add to train data
                        train.append((img_path, idx2label[img_idx], camid))
                    else:
-                        # add to gallery data
                        gallery.append((img_path, img_idx, camid))

-                split = {'train': train, 'query': query, 'gallery': gallery,
-                         'num_train_pids': 125,
-                         'num_query_pids': 125,
-                         'num_gallery_pids': 900,
-                         }
+                split = {
+                    'train': train,
+                    'query': query,
+                    'gallery': gallery,
+                    'num_train_pids': 125,
+                    'num_query_pids': 125,
+                    'num_gallery_pids': 900
+                }
                splits.append(split)
            
            print('Totally {} splits are created'.format(len(splits)))
            write_json(splits, self.split_path)
-            print('Split file saved to {}'.format(self.split_path))
-
-        print('Splits created')
+            print('Split file saved to {}'.format(self.split_path))
--- a/torchreid/datasets/ilids.py
+++ b/torchreid/datasets/ilids.py
@ -23,8 +23,7 @@ from .bases import BaseImageDataset


 class iLIDS(BaseImageDataset):
-    """
-    QMUL-iLIDS
+    """QMUL-iLIDS

    Reference:
    Zheng et al. Associating Groups of People. BMVC 2009.
@ -44,7 +43,12 @@ class iLIDS(BaseImageDataset):
        self.split_path = osp.join(self.dataset_dir, 'splits.json')

        self.download_data()
-        self.check_before_run()
+        
+        required_files = [
+            self.dataset_dir,
+            self.data_dir
+        ]
+        self.check_before_run(required_files)

        self.prepare_split()
        splits = read_json(self.split_path)
@ -55,7 +59,6 @@ class iLIDS(BaseImageDataset):
        train, query, gallery = self.process_split(split)

        if verbose:
-            print('=> iLIDS loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -68,7 +71,6 @@ class iLIDS(BaseImageDataset):

    def download_data(self):
        if osp.exists(self.dataset_dir):
-            print('This dataset has been downloaded.')
            return

        mkdir_if_missing(self.dataset_dir)
@ -82,18 +84,10 @@ class iLIDS(BaseImageDataset):
        tar.extractall(path=self.dataset_dir)
        tar.close()

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.data_dir):
-            raise RuntimeError('"{}" is not available'.format(self.data_dir))
-
    def prepare_split(self):
        if not osp.exists(self.split_path):
            print('Creating splits ...')
            
-            # read image paths
            paths = glob.glob(osp.join(self.data_dir, '*.jpg'))
            img_names = [osp.basename(path) for path in paths]
            num_imgs = len(img_names)
@ -122,7 +116,6 @@ class iLIDS(BaseImageDataset):
                train_pids = pids_copy[:num_train_pids]
                test_pids = pids_copy[num_train_pids:]

-                # store image names
                train = []
                query = []
                gallery = []
--- a/torchreid/datasets/ilidsvid.py
+++ b/torchreid/datasets/ilidsvid.py
@ -20,8 +20,7 @@ from .bases import BaseVideoDataset


 class iLIDSVID(BaseVideoDataset):
-    """
-    iLIDS-VID
+    """iLIDS-VID

    Reference:
    Wang et al. Person Re-Identification by Video Ranking. ECCV 2014.
@ -47,7 +46,13 @@ class iLIDSVID(BaseVideoDataset):
        self.cam_2_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam2')

        self.download_data()
-        self.check_before_run()
+        
+        required_files = [
+            self.dataset_dir,
+            self.data_dir,
+            self.split_dir
+        ]
+        self.check_before_run(required_files)

        self.prepare_split()
        splits = read_json(self.split_path)
@ -55,14 +60,12 @@ class iLIDSVID(BaseVideoDataset):
            raise ValueError('split_id exceeds range, received {}, but expected between 0 and {}'.format(split_id, len(splits)-1))
        split = splits[split_id]
        train_dirs, test_dirs = split['train'], split['test']
-        print('# train identites: {}, # test identites {}'.format(len(train_dirs), len(test_dirs)))

        train = self.process_data(train_dirs, cam1=True, cam2=True)
        query = self.process_data(test_dirs, cam1=True, cam2=False)
        gallery = self.process_data(test_dirs, cam1=False, cam2=True)

        if verbose:
-            print('=> iLIDS-VID loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -75,7 +78,6 @@ class iLIDSVID(BaseVideoDataset):

    def download_data(self):
        if osp.exists(self.dataset_dir):
-            print('This dataset has been downloaded.')
            return

        mkdir_if_missing(self.dataset_dir)
@ -89,15 +91,6 @@ class iLIDSVID(BaseVideoDataset):
        tar.extractall(path=self.dataset_dir)
        tar.close()

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.data_dir):
-            raise RuntimeError('"{}" is not available'.format(self.data_dir))
-        if not osp.exists(self.split_dir):
-            raise RuntimeError('"{}" is not available'.format(self.split_dir))
-
    def prepare_split(self):
        if not osp.exists(self.split_path):
            print('Creating splits ...')
@ -140,8 +133,6 @@ class iLIDSVID(BaseVideoDataset):
            print('Split file is saved to {}'.format(self.split_path))
            write_json(splits, self.split_path)

-        print('Splits created')
-
    def process_data(self, dirnames, cam1=True, cam2=True):
        tracklets = []
        dirname2pid = {dirname:i for i, dirname in enumerate(dirnames)}
--- a/torchreid/datasets/market1501.py
+++ b/torchreid/datasets/market1501.py
@ -19,8 +19,7 @@ from .bases import BaseImageDataset


 class Market1501(BaseImageDataset):
-    """
-    Market1501
+    """Market1501

    Reference:
    Zheng et al. Scalable Person Re-identification: A Benchmark. ICCV 2015.
@ -42,7 +41,15 @@ class Market1501(BaseImageDataset):
        self.extra_gallery_dir = osp.join(self.dataset_dir, 'images')
        self.market1501_500k = market1501_500k

-        self.check_before_run()
+        required_files = [
+            self.dataset_dir,
+            self.train_dir,
+            self.query_dir,
+            self.gallery_dir
+        ]
+        if self.market1501_500k:
+            required_files.append(self.extra_gallery_dir)
+        self.check_before_run(required_files)

        train = self.process_dir(self.train_dir, relabel=True)
        query = self.process_dir(self.query_dir, relabel=False)
@ -51,7 +58,6 @@ class Market1501(BaseImageDataset):
            gallery += self.process_dir(self.extra_gallery_dir, relabel=False)

        if verbose:
-            print('=> Market1501 loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -62,19 +68,6 @@ class Market1501(BaseImageDataset):
        self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query)
        self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.train_dir):
-            raise RuntimeError('"{}" is not available'.format(self.train_dir))
-        if not osp.exists(self.query_dir):
-            raise RuntimeError('"{}" is not available'.format(self.query_dir))
-        if not osp.exists(self.gallery_dir):
-            raise RuntimeError('"{}" is not available'.format(self.gallery_dir))
-        if self.market1501_500k and not osp.exists(self.extra_gallery_dir):
-            raise RuntimeError('"{}" is not available'.format(self.extra_gallery_dir))
-
    def process_dir(self, dir_path, relabel=False):
        img_paths = glob.glob(osp.join(dir_path, '*.jpg'))
        pattern = re.compile(r'([-\d]+)_c(\d)')
@ -82,18 +75,21 @@ class Market1501(BaseImageDataset):
        pid_container = set()
        for img_path in img_paths:
            pid, _ = map(int, pattern.search(img_path).groups())
-            if pid == -1: continue  # junk images are just ignored
+            if pid == -1:
+                continue # junk images are just ignored
            pid_container.add(pid)
        pid2label = {pid:label for label, pid in enumerate(pid_container)}

        dataset = []
        for img_path in img_paths:
            pid, camid = map(int, pattern.search(img_path).groups())
-            if pid == -1: continue  # junk images are just ignored
+            if pid == -1:
+                continue # junk images are just ignored
            assert 0 <= pid <= 1501  # pid == 0 means background
            assert 1 <= camid <= 6
            camid -= 1 # index starts from 0
-            if relabel: pid = pid2label[pid]
+            if relabel:
+                pid = pid2label[pid]
            dataset.append((img_path, pid, camid))

        return dataset
--- a/torchreid/datasets/mars.py
+++ b/torchreid/datasets/mars.py
@ -19,8 +19,7 @@ from .bases import BaseVideoDataset


 class Mars(BaseVideoDataset):
-    """
-    MARS
+    """MARS

    Reference:
    Zheng et al. MARS: A Video Benchmark for Large-Scale Person Re-identification. ECCV 2016.
@ -43,9 +42,16 @@ class Mars(BaseVideoDataset):
        self.track_test_info_path = osp.join(self.dataset_dir, 'info/tracks_test_info.mat')
        self.query_IDX_path = osp.join(self.dataset_dir, 'info/query_IDX.mat')

-        self.check_before_run()
+        required_files = [
+            self.dataset_dir,
+            self.train_name_path,
+            self.test_name_path,
+            self.track_train_info_path,
+            self.track_test_info_path,
+            self.query_IDX_path
+        ]
+        self.check_before_run(required_files)

-        # prepare meta data
        train_names = self.get_names(self.train_name_path)
        test_names = self.get_names(self.test_name_path)
        track_train = loadmat(self.track_train_info_path)['track_train_info'] # numpy.ndarray (8298, 4)
@ -61,7 +67,6 @@ class Mars(BaseVideoDataset):
        gallery = self.process_data(test_names, track_gallery, home_dir='bbox_test', relabel=False, min_seq_len=min_seq_len)

        if verbose:
-            print('=> MARS loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -72,21 +77,6 @@ class Mars(BaseVideoDataset):
        self.num_query_pids, _, self.num_query_cams = self.get_videodata_info(self.query)
        self.num_gallery_pids, _, self.num_gallery_cams = self.get_videodata_info(self.gallery)

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.train_name_path):
-            raise RuntimeError('"{}" is not available'.format(self.train_name_path))
-        if not osp.exists(self.test_name_path):
-            raise RuntimeError('"{}" is not available'.format(self.test_name_path))
-        if not osp.exists(self.track_train_info_path):
-            raise RuntimeError('"{}" is not available'.format(self.track_train_info_path))
-        if not osp.exists(self.track_test_info_path):
-            raise RuntimeError('"{}" is not available'.format(self.track_test_info_path))
-        if not osp.exists(self.query_IDX_path):
-            raise RuntimeError('"{}" is not available'.format(self.query_IDX_path))
-
    def get_names(self, fpath):
        names = []
        with open(fpath, 'r') as f:
@ -107,7 +97,8 @@ class Mars(BaseVideoDataset):
        for tracklet_idx in range(num_tracklets):
            data = meta_data[tracklet_idx,...]
            start_index, end_index, pid, camid = data
-            if pid == -1: continue # junk images are just ignored
+            if pid == -1:
+                continue # junk images are just ignored
            assert 1 <= camid <= 6
            if relabel: pid = pid2label[pid]
            camid -= 1 # index starts from 0
--- a/torchreid/datasets/msmt17.py
+++ b/torchreid/datasets/msmt17.py
@ -21,23 +21,22 @@ from .bases import BaseImageDataset
 # To adapt to different versions
 # Log:
 # 22.01.2019: v1 and v2 only differ in dir names
-_TRAIN_DIR_KEY = 'train_dir'
-_TEST_DIR_KEY = 'test_dir'
-_VERSION = {
+TRAIN_DIR_KEY = 'train_dir'
+TEST_DIR_KEY = 'test_dir'
+VERSION_DICT = {
    'MSMT17_V1': {
-        _TRAIN_DIR_KEY: 'train',
-        _TEST_DIR_KEY: 'test',
+        TRAIN_DIR_KEY: 'train',
+        TEST_DIR_KEY: 'test',
    },
    'MSMT17_V2': {
-        _TRAIN_DIR_KEY: 'mask_train_v2',
-        _TEST_DIR_KEY: 'mask_test_v2',
+        TRAIN_DIR_KEY: 'mask_train_v2',
+        TEST_DIR_KEY: 'mask_test_v2',
    }
 }


 class MSMT17(BaseImageDataset):
-    """
-    MSMT17
+    """MSMT17

    Reference:
    Wei et al. Person Transfer GAN to Bridge Domain Gap for Person Re-Identification. CVPR 2018.
@ -55,10 +54,10 @@ class MSMT17(BaseImageDataset):
        super(MSMT17, self).__init__(root)
        self.dataset_dir = osp.join(self.root, self.dataset_dir)
        has_main_dir = False
-        for main_dir in _VERSION:
+        for main_dir in VERSION_DICT:
            if osp.exists(osp.join(self.dataset_dir, main_dir)):
-                train_dir = _VERSION[main_dir][_TRAIN_DIR_KEY]
-                test_dir = _VERSION[main_dir][_TEST_DIR_KEY]
+                train_dir = VERSION_DICT[main_dir][TRAIN_DIR_KEY]
+                test_dir = VERSION_DICT[main_dir][TEST_DIR_KEY]
                has_main_dir = True
                break
        assert has_main_dir, 'Dataset folder not found'
@ -69,9 +68,15 @@ class MSMT17(BaseImageDataset):
        self.list_query_path = osp.join(self.dataset_dir, main_dir, 'list_query.txt')
        self.list_gallery_path = osp.join(self.dataset_dir, main_dir, 'list_gallery.txt')

-        self.check_before_run()
+        required_files = [
+            self.dataset_dir,
+            self.train_dir,
+            self.test_dir
+        ]
+        self.check_before_run(required_files)
+
        train = self.process_dir(self.train_dir, self.list_train_path)
-        #val = self.process_dir(self.train_dir, self.list_val_path)
+        val = self.process_dir(self.train_dir, self.list_val_path)
        query = self.process_dir(self.test_dir, self.list_query_path)
        gallery = self.process_dir(self.test_dir, self.list_gallery_path)

@ -80,7 +85,6 @@ class MSMT17(BaseImageDataset):
        #num_train_imgs += num_val_imgs

        if verbose:
-            print('=> MSMT17 loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -91,15 +95,6 @@ class MSMT17(BaseImageDataset):
        self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query)
        self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.train_dir):
-            raise RuntimeError('"{}" is not available'.format(self.train_dir))
-        if not osp.exists(self.test_dir):
-            raise RuntimeError('"{}" is not available'.format(self.test_dir))
-
    def process_dir(self, dir_path, list_path):
        with open(list_path, 'r') as txt:
            lines = txt.readlines()
@ -113,7 +108,7 @@ class MSMT17(BaseImageDataset):
            dataset.append((img_path, pid, camid))
            pid_container.add(pid)
        num_pids = len(pid_container)
-        # check if pid starts from 0 and increments with 1
        for idx, pid in enumerate(pid_container):
-            assert idx == pid, 'See code comment for explanation'
+            if idx != pid:
+                raise RuntimeError('pid does not start from 0 and increment by 1')
        return dataset
--- a/torchreid/datasets/prid.py
+++ b/torchreid/datasets/prid.py
@ -23,8 +23,7 @@ from .bases import BaseImageDataset


 class PRID(BaseImageDataset):
-    """
-    PRID (single-shot version of prid-2011)
+    """PRID (single-shot version of prid-2011)

    Reference:
    Hirzer et al. Person Re-Identification by Descriptive and Discriminative Classification. SCIA 2011.
@ -46,7 +45,12 @@ class PRID(BaseImageDataset):
        self.cam_b_dir = osp.join(self.dataset_dir, 'prid_2011', 'single_shot', 'cam_b')
        self.split_path = osp.join(self.dataset_dir, 'splits_single_shot.json')

-        self.check_before_run()
+        required_files = [
+            self.dataset_dir,
+            self.cam_a_dir,
+            self.cam_b_dir
+        ]
+        self.check_before_run(required_files)

        self.prepare_split()
        splits = read_json(self.split_path)
@ -57,7 +61,6 @@ class PRID(BaseImageDataset):
        train, query, gallery = self.process_split(split)

        if verbose:
-            print('=> PRID loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -68,15 +71,6 @@ class PRID(BaseImageDataset):
        self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query)
        self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.cam_a_dir):
-            raise RuntimeError('"{}" is not available'.format(self.cam_a_dir))
-        if not osp.exists(self.cam_b_dir):
-            raise RuntimeError('"{}" is not available'.format(self.cam_b_dir))
-
    def prepare_split(self):
        if not osp.exists(self.split_path):
            print('Creating splits ...')
--- a/torchreid/datasets/prid2011.py
+++ b/torchreid/datasets/prid2011.py
@ -20,8 +20,7 @@ from .bases import BaseVideoDataset


 class PRID2011(BaseVideoDataset):
-    """
-    PRID2011
+    """PRID2011

    Reference:
    Hirzer et al. Person Re-Identification by Descriptive and Discriminative Classification. SCIA 2011.
@ -39,23 +38,27 @@ class PRID2011(BaseVideoDataset):
        super(PRID2011, self).__init__(root)
        self.dataset_dir = osp.join(self.root, self.dataset_dir)
        self.split_path = osp.join(self.dataset_dir, 'splits_prid2011.json')
-        self.cam_a_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_a')
-        self.cam_b_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_b')
+        self.cam_a_dir = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_a')
+        self.cam_b_dir = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_b')
+
+        required_files = [
+            self.dataset_dir,
+            self.cam_a_dir,
+            self.cam_b_dir
+        ]
+        self.check_before_run(required_files)

-        self.check_before_run()
        splits = read_json(self.split_path)
        if split_id >=  len(splits):
            raise ValueError('split_id exceeds range, received {}, but expected between 0 and {}'.format(split_id, len(splits)-1))
        split = splits[split_id]
        train_dirs, test_dirs = split['train'], split['test']
-        print('# train identites: {}, # test identites {}'.format(len(train_dirs), len(test_dirs)))

        train = self.process_dir(train_dirs, cam1=True, cam2=True)
        query = self.process_dir(test_dirs, cam1=True, cam2=False)
        gallery = self.process_dir(test_dirs, cam1=False, cam2=True)

        if verbose:
-            print('=> PRID2011 loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -66,18 +69,13 @@ class PRID2011(BaseVideoDataset):
        self.num_query_pids, _, self.num_query_cams = self.get_videodata_info(self.query)
        self.num_gallery_pids, _, self.num_gallery_cams = self.get_videodata_info(self.gallery)

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-
    def process_dir(self, dirnames, cam1=True, cam2=True):
        tracklets = []
        dirname2pid = {dirname:i for i, dirname in enumerate(dirnames)}
        
        for dirname in dirnames:
            if cam1:
-                person_dir = osp.join(self.cam_a_path, dirname)
+                person_dir = osp.join(self.cam_a_dir, dirname)
                img_names = glob.glob(osp.join(person_dir, '*.png'))
                assert len(img_names) > 0
                img_names = tuple(img_names)
@ -85,7 +83,7 @@ class PRID2011(BaseVideoDataset):
                tracklets.append((img_names, pid, 0))

            if cam2:
-                person_dir = osp.join(self.cam_b_path, dirname)
+                person_dir = osp.join(self.cam_b_dir, dirname)
                img_names = glob.glob(osp.join(person_dir, '*.png'))
                assert len(img_names) > 0
                img_names = tuple(img_names)
--- a/torchreid/datasets/prid450s.py
+++ b/torchreid/datasets/prid450s.py
@ -20,8 +20,7 @@ from .bases import BaseImageDataset


 class PRID450S(BaseImageDataset):
-    """
-    PRID450S
+    """PRID450S

    Reference:
    Roth et al. Mahalanobis Distance Learning for Person Re-Identification. PR 2014.
@ -40,11 +39,17 @@ class PRID450S(BaseImageDataset):
        self.dataset_dir = osp.join(self.root, self.dataset_dir)
        self.dataset_url = 'https://files.icg.tugraz.at/f/8c709245bb/?raw=1'
        self.split_path = osp.join(self.dataset_dir, 'splits.json')
-        self.cam_a_path = osp.join(self.dataset_dir, 'cam_a')
-        self.cam_b_path = osp.join(self.dataset_dir, 'cam_b')
+        self.cam_a_dir = osp.join(self.dataset_dir, 'cam_a')
+        self.cam_b_dir = osp.join(self.dataset_dir, 'cam_b')

        self.download_data()
-        self.check_before_run()
+        
+        required_files = [
+            self.dataset_dir,
+            self.cam_a_dir,
+            self.cam_b_dir
+        ]
+        self.check_before_run(required_files)

        self.prepare_split()
        splits = read_json(self.split_path)
@ -61,7 +66,6 @@ class PRID450S(BaseImageDataset):
        gallery = [tuple(item) for item in gallery]

        if verbose:
-            print('=> PRID450S loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -72,18 +76,8 @@ class PRID450S(BaseImageDataset):
        self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query)
        self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.cam_a_path):
-            raise RuntimeError('"{}" is not available'.format(self.cam_a_path))
-        if not osp.exists(self.cam_b_path):
-            raise RuntimeError('"{}" is not available'.format(self.cam_b_path))
-
    def download_data(self):
        if osp.exists(self.dataset_dir):
-            print('This dataset has been downloaded.')
            return

        print('Creating directory {}'.format(self.dataset_dir))
@ -100,8 +94,8 @@ class PRID450S(BaseImageDataset):

    def prepare_split(self):
        if not osp.exists(self.split_path):
-            cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_path, 'img_*.png')))
-            cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_path, 'img_*.png')))
+            cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_dir, 'img_*.png')))
+            cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_dir, 'img_*.png')))
            assert len(cam_a_imgs) == len(cam_b_imgs)

            num_pids = len(cam_a_imgs)
@ -134,15 +128,16 @@ class PRID450S(BaseImageDataset):
                    else:
                        test.append((img_path, img_idx, 1))

-                split = {'train': train, 'query': test, 'gallery': test,
-                         'num_train_pids': num_train_pids,
-                         'num_query_pids': num_pids - num_train_pids,
-                         'num_gallery_pids': num_pids - num_train_pids,
-                         }
+                split = {
+                    'train': train,
+                    'query': test,
+                    'gallery': test,
+                    'num_train_pids': num_train_pids,
+                    'num_query_pids': num_pids - num_train_pids,
+                    'num_gallery_pids': num_pids - num_train_pids
+                }
                splits.append(split)

            print('Totally {} splits are created'.format(len(splits)))
            write_json(splits, self.split_path)
-            print('Split file saved to {}'.format(self.split_path))
-
-        print('Splits created')
+            print('Split file saved to {}'.format(self.split_path))
--- a/torchreid/datasets/sensereid.py
+++ b/torchreid/datasets/sensereid.py
@ -20,8 +20,7 @@ from .bases import BaseImageDataset


 class SenseReID(BaseImageDataset):
-    """
-    SenseReID
+    """SenseReID

    This dataset is used for test purpose only.

@ -44,13 +43,17 @@ class SenseReID(BaseImageDataset):
        self.query_dir = osp.join(self.dataset_dir, 'SenseReID', 'test_probe')
        self.gallery_dir = osp.join(self.dataset_dir, 'SenseReID', 'test_gallery')

-        self.check_before_run()
+        required_files = [
+            self.dataset_dir,
+            self.query_dir,
+            self.gallery_dir
+        ]
+        self.check_before_run(required_files)

        query = self.process_dir(self.query_dir)
        gallery = self.process_dir(self.gallery_dir)

        if verbose:
-            print('=> SenseReID loaded (test only)')
            self.print_dataset_statistics(query, query, gallery)

        self.train = copy.deepcopy(query) # only used to initialize trainloader
@ -61,15 +64,6 @@ class SenseReID(BaseImageDataset):
        self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query)
        self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.query_dir):
-            raise RuntimeError('"{}" is not available'.format(self.query_dir))
-        if not osp.exists(self.gallery_dir):
-            raise RuntimeError('"{}" is not available'.format(self.gallery_dir))
-
    def process_dir(self, dir_path):
        img_paths = glob.glob(osp.join(dir_path, '*.jpg'))
        dataset = []
--- a/torchreid/datasets/viper.py
+++ b/torchreid/datasets/viper.py
@ -19,8 +19,7 @@ from .bases import BaseImageDataset


 class VIPeR(BaseImageDataset):
-    """
-    VIPeR
+    """VIPeR

    Reference:
    Gray et al. Evaluating appearance models for recognition, reacquisition, and tracking. PETS 2007.
@ -38,12 +37,18 @@ class VIPeR(BaseImageDataset):
        super(VIPeR, self).__init__(root)
        self.dataset_dir = osp.join(self.root, self.dataset_dir)
        self.dataset_url = 'http://users.soe.ucsc.edu/~manduchi/VIPeR.v1.0.zip'
-        self.cam_a_path = osp.join(self.dataset_dir, 'VIPeR', 'cam_a')
-        self.cam_b_path = osp.join(self.dataset_dir, 'VIPeR', 'cam_b')
+        self.cam_a_dir = osp.join(self.dataset_dir, 'VIPeR', 'cam_a')
+        self.cam_b_dir = osp.join(self.dataset_dir, 'VIPeR', 'cam_b')
        self.split_path = osp.join(self.dataset_dir, 'splits.json')

        self.download_data()
-        self.check_before_run()
+        
+        required_files = [
+            self.dataset_dir,
+            self.cam_a_dir,
+            self.cam_b_dir
+        ]
+        self.check_before_run(required_files)
        
        self.prepare_split()
        splits = read_json(self.split_path)
@ -52,7 +57,7 @@ class VIPeR(BaseImageDataset):
        split = splits[split_id]

        train = split['train']
-        query = split['query'] # query and gallery share the same images
+        query = split['query'] # note: query and gallery share the same images
        gallery = split['gallery']

        train = [tuple(item) for item in train]
@ -60,7 +65,6 @@ class VIPeR(BaseImageDataset):
        gallery = [tuple(item) for item in gallery]

        if verbose:
-            print('=> VIPeR loaded')
            self.print_dataset_statistics(train, query, gallery)

        self.train = train
@ -73,7 +77,6 @@ class VIPeR(BaseImageDataset):

    def download_data(self):
        if osp.exists(self.dataset_dir):
-            print('This dataset has been downloaded.')
            return

        print('Creating directory {}'.format(self.dataset_dir))
@ -88,21 +91,12 @@ class VIPeR(BaseImageDataset):
        zip_ref.extractall(self.dataset_dir)
        zip_ref.close()

-    def check_before_run(self):
-        """Check if all files are available before going deeper"""
-        if not osp.exists(self.dataset_dir):
-            raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
-        if not osp.exists(self.cam_a_path):
-            raise RuntimeError('"{}" is not available'.format(self.cam_a_path))
-        if not osp.exists(self.cam_b_path):
-            raise RuntimeError('"{}" is not available'.format(self.cam_b_path))
-
    def prepare_split(self):
        if not osp.exists(self.split_path):
            print('Creating 10 random splits of train ids and test ids')

-            cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_path, '*.bmp')))
-            cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_path, '*.bmp')))
+            cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_dir, '*.bmp')))
+            cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_dir, '*.bmp')))
            assert len(cam_a_imgs) == len(cam_b_imgs)
            num_pids = len(cam_a_imgs)
            print('Number of identities: {}'.format(num_pids))
@ -141,23 +135,27 @@ class VIPeR(BaseImageDataset):
                    test_b.append((cam_b_img, pid, 1))

                # use cameraA as query and cameraB as gallery
-                split = {'train': train, 'query': test_a, 'gallery': test_b,
-                         'num_train_pids': num_train_pids,
-                         'num_query_pids': num_pids - num_train_pids,
-                         'num_gallery_pids': num_pids - num_train_pids
-                         }
+                split = {
+                    'train': train,
+                    'query': test_a,
+                    'gallery': test_b,
+                    'num_train_pids': num_train_pids,
+                    'num_query_pids': num_pids - num_train_pids,
+                    'num_gallery_pids': num_pids - num_train_pids
+                }
                splits.append(split)

                # use cameraB as query and cameraA as gallery
-                split = {'train': train, 'query': test_b, 'gallery': test_a,
-                         'num_train_pids': num_train_pids,
-                         'num_query_pids': num_pids - num_train_pids,
-                         'num_gallery_pids': num_pids - num_train_pids
-                         }
+                split = {
+                    'train': train,
+                    'query': test_b,
+                    'gallery': test_a,
+                    'num_train_pids': num_train_pids,
+                    'num_query_pids': num_pids - num_train_pids,
+                    'num_gallery_pids': num_pids - num_train_pids
+                }
                splits.append(split)

            print('Totally {} splits are created'.format(len(splits)))
            write_json(splits, self.split_path)
-            print('Split file saved to {}'.format(self.split_path))
-
-        print('Splits created')
+            print('Split file saved to {}'.format(self.split_path))