diff --git a/torchreid/datasets/bases.py b/torchreid/datasets/bases.py index 136b780..9dfb63c 100644 --- a/torchreid/datasets/bases.py +++ b/torchreid/datasets/bases.py @@ -7,12 +7,17 @@ import numpy as np class BaseDataset(object): - """ - Base class of reid dataset - """ + """Base class of reid dataset""" + def __init__(self, root): self.root = osp.expanduser(root) + def check_before_run(self, required_files): + """Check if required files exist before going deeper""" + for f in required_files: + if not osp.exists(f): + raise RuntimeError('"{}" is not found'.format(f)) + def get_imagedata_info(self, data): pids, cams = [], [] for _, pid, camid in data: @@ -45,16 +50,14 @@ class BaseDataset(object): class BaseImageDataset(BaseDataset): - """ - Base class of image reid dataset - """ + """Base class of image-reid dataset""" def print_dataset_statistics(self, train, query, gallery): num_train_pids, num_train_imgs, num_train_cams = self.get_imagedata_info(train) num_query_pids, num_query_imgs, num_query_cams = self.get_imagedata_info(query) num_gallery_pids, num_gallery_imgs, num_gallery_cams = self.get_imagedata_info(gallery) - print('Image Dataset statistics:') + print('=> Loaded {}'.format(self.__class__.__name__)) print(' ----------------------------------------') print(' subset | # ids | # images | # cameras') print(' ----------------------------------------') @@ -65,9 +68,7 @@ class BaseImageDataset(BaseDataset): class BaseVideoDataset(BaseDataset): - """ - Base class of video reid dataset - """ + """Base class of video-reid dataset""" def print_dataset_statistics(self, train, query, gallery): num_train_pids, num_train_tracklets, num_train_cams, train_tracklet_stats = \ @@ -84,7 +85,7 @@ class BaseVideoDataset(BaseDataset): max_num = np.max(tracklet_stats) avg_num = np.mean(tracklet_stats) - print('Video Dataset statistics:') + print('=> Loaded {}'.format(self.__class__.__name__)) print(' -------------------------------------------') print(' subset | # ids | # tracklets | # cameras') print(' -------------------------------------------') diff --git a/torchreid/datasets/cuhk01.py b/torchreid/datasets/cuhk01.py index 6c0d156..63f5635 100644 --- a/torchreid/datasets/cuhk01.py +++ b/torchreid/datasets/cuhk01.py @@ -20,8 +20,7 @@ from .bases import BaseImageDataset class CUHK01(BaseImageDataset): - """ - CUHK01 + """CUHK01 Reference: Li et al. Human Reidentification with Transferred Metric Learning. ACCV 2012. @@ -43,7 +42,12 @@ class CUHK01(BaseImageDataset): self.split_path = osp.join(self.dataset_dir, 'splits.json') self.extract_file() - self.check_before_run() + + required_files = [ + self.dataset_dir, + self.campus_dir + ] + self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) @@ -60,7 +64,6 @@ class CUHK01(BaseImageDataset): gallery = [tuple(item) for item in gallery] if verbose: - print('=> CUHK01 loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -77,14 +80,6 @@ class CUHK01(BaseImageDataset): zip_ref = zipfile.ZipFile(self.zip_path, 'r') zip_ref.extractall(self.dataset_dir) zip_ref.close() - print('Files extracted') - - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.campus_dir): - raise RuntimeError('"{}" is not available'.format(self.campus_dir)) def prepare_split(self): """ @@ -116,7 +111,6 @@ class CUHK01(BaseImageDataset): idx2label = {idx: label for label, idx in enumerate(train_idxs)} train, test_a, test_b = [], [], [] - for img_path, pid, camid in img_list: if pid in train_idxs: train.append((img_path, idx2label[pid], camid)) @@ -127,23 +121,27 @@ class CUHK01(BaseImageDataset): test_b.append((img_path, pid, camid)) # use cameraA as query and cameraB as gallery - split = {'train': train, 'query': test_a, 'gallery': test_b, - 'num_train_pids': num_train_pids, - 'num_query_pids': num_pids - num_train_pids, - 'num_gallery_pids': num_pids - num_train_pids, - } + split = { + 'train': train, + 'query': test_a, + 'gallery': test_b, + 'num_train_pids': num_train_pids, + 'num_query_pids': num_pids - num_train_pids, + 'num_gallery_pids': num_pids - num_train_pids + } splits.append(split) # use cameraB as query and cameraA as gallery - split = {'train': train, 'query': test_b, 'gallery': test_a, - 'num_train_pids': num_train_pids, - 'num_query_pids': num_pids - num_train_pids, - 'num_gallery_pids': num_pids - num_train_pids, - } + split = { + 'train': train, + 'query': test_b, + 'gallery': test_a, + 'num_train_pids': num_train_pids, + 'num_query_pids': num_pids - num_train_pids, + 'num_gallery_pids': num_pids - num_train_pids + } splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) - print('Split file saved to {}'.format(self.split_path)) - - print('Splits created') \ No newline at end of file + print('Split file saved to {}'.format(self.split_path)) \ No newline at end of file diff --git a/torchreid/datasets/cuhk03.py b/torchreid/datasets/cuhk03.py index 5aa48dd..71a0e3f 100644 --- a/torchreid/datasets/cuhk03.py +++ b/torchreid/datasets/cuhk03.py @@ -20,8 +20,7 @@ from .bases import BaseImageDataset class CUHK03(BaseImageDataset): - """ - CUHK03 + """CUHK03 Reference: Li et al. DeepReID: Deep Filter Pairing Neural Network for Person Re-identification. CVPR 2014. @@ -58,27 +57,31 @@ class CUHK03(BaseImageDataset): self.split_new_det_mat_path = osp.join(self.dataset_dir, 'cuhk03_new_protocol_config_detected.mat') self.split_new_lab_mat_path = osp.join(self.dataset_dir, 'cuhk03_new_protocol_config_labeled.mat') - self.check_before_run() - self.preprocess() + required_files = [ + self.dataset_dir, + self.data_dir, + self.raw_mat_path, + self.split_new_det_mat_path, + self.split_new_lab_mat_path + ] + self.check_before_run(required_files) + + self.preprocess_split() if cuhk03_labeled: - image_type = 'labeled' split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path else: - image_type = 'detected' split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path splits = read_json(split_path) assert split_id < len(splits), 'Condition split_id ({}) < len(splits) ({}) is false'.format(split_id, len(splits)) split = splits[split_id] - print('Split index = {}'.format(split_id)) train = split['train'] query = split['query'] gallery = split['gallery'] if verbose: - print('=> CUHK03 ({}) loaded'.format(image_type)) self.print_dataset_statistics(train, query, gallery) self.train = train @@ -89,39 +92,25 @@ class CUHK03(BaseImageDataset): self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.data_dir): - raise RuntimeError('"{}" is not available'.format(self.data_dir)) - if not osp.exists(self.raw_mat_path): - raise RuntimeError('"{}" is not available'.format(self.raw_mat_path)) - if not osp.exists(self.split_new_det_mat_path): - raise RuntimeError('"{}" is not available'.format(self.split_new_det_mat_path)) - if not osp.exists(self.split_new_lab_mat_path): - raise RuntimeError('"{}" is not available'.format(self.split_new_lab_mat_path)) - - def preprocess(self): + def preprocess_split(self): """ This function is a bit complex and ugly, what it does is - 1. Extract data from cuhk-03.mat and save as png images. - 2. Create 20 classic splits. (Li et al. CVPR'14) - 3. Create new split. (Zhong et al. CVPR'17) + 1. extract data from cuhk-03.mat and save as png images + 2. create 20 classic splits (Li et al. CVPR'14) + 3. create new split (Zhong et al. CVPR'17) """ - print('Note: if root path is changed, the previously generated json files need to be re-generated (delete them first)') - if osp.exists(self.imgs_labeled_dir) and \ - osp.exists(self.imgs_detected_dir) and \ - osp.exists(self.split_classic_det_json_path) and \ - osp.exists(self.split_classic_lab_json_path) and \ - osp.exists(self.split_new_det_json_path) and \ - osp.exists(self.split_new_lab_json_path): - return + if osp.exists(self.imgs_labeled_dir) \ + and osp.exists(self.imgs_detected_dir) \ + and osp.exists(self.split_classic_det_json_path) \ + and osp.exists(self.split_classic_lab_json_path) \ + and osp.exists(self.split_new_det_json_path) \ + and osp.exists(self.split_new_lab_json_path): + return mkdir_if_missing(self.imgs_detected_dir) mkdir_if_missing(self.imgs_labeled_dir) - print('Extract image data from {} and save as png'.format(self.raw_mat_path)) + print('Extract image data from "{}" and save as png'.format(self.raw_mat_path)) mat = h5py.File(self.raw_mat_path, 'r') def _deref(ref): @@ -131,14 +120,14 @@ class CUHK03(BaseImageDataset): img_paths = [] # Note: some persons only have images for one view for imgid, img_ref in enumerate(img_refs): img = _deref(img_ref) - # skip empty cell - if img.size == 0 or img.ndim < 3: continue + if img.size==0 or img.ndim<3: + continue # skip empty cell # images are saved with the following format, index-1 (ensure uniqueness) # campid: index of camera pair (1-5) # pid: index of person in 'campid'-th camera pair # viewid: index of view, {1, 2} # imgid: index of image, (1-10) - viewid = 1 if imgid < 5 else 2 + viewid = 1 if imgid<5 else 2 img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format(campid+1, pid+1, viewid, imgid+1) img_path = osp.join(save_dir, img_name) if not osp.isfile(img_path): @@ -146,11 +135,11 @@ class CUHK03(BaseImageDataset): img_paths.append(img_path) return img_paths - def _extract_img(name): - print('Processing {} images (extract and save) ...'.format(name)) + def _extract_img(image_type): + print('Processing {} images ...'.format(image_type)) meta_data = [] - imgs_dir = self.imgs_detected_dir if name == 'detected' else self.imgs_labeled_dir - for campid, camp_ref in enumerate(mat[name][0]): + imgs_dir = self.imgs_detected_dir if image_type=='detected' else self.imgs_labeled_dir + for campid, camp_ref in enumerate(mat[image_type][0]): camp = _deref(camp_ref) num_pids = camp.shape[0] for pid in range(num_pids): @@ -218,7 +207,8 @@ class CUHK03(BaseImageDataset): img_name = filelist[idx][0] camid = int(img_name.split('_')[2]) - 1 # make it 0-based pid = pids[idx] - if relabel: pid = pid2label[pid] + if relabel: + pid = pid2label[pid] img_path = osp.join(img_dir, img_name) tmp_set.append((img_path, int(pid), camid)) unique_pids.add(pid) @@ -237,28 +227,38 @@ class CUHK03(BaseImageDataset): gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False) return train_info, query_info, gallery_info - print('Creating new splits for detected images (767/700) ...') + print('Creating new split for detected images (767/700) ...') train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_det_mat_path), self.imgs_detected_dir, ) - splits = [{ - 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], - 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], - 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], - 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2], + split = [{ + 'train': train_info[0], + 'query': query_info[0], + 'gallery': gallery_info[0], + 'num_train_pids': train_info[1], + 'num_train_imgs': train_info[2], + 'num_query_pids': query_info[1], + 'num_query_imgs': query_info[2], + 'num_gallery_pids': gallery_info[1], + 'num_gallery_imgs': gallery_info[2], }] - write_json(splits, self.split_new_det_json_path) + write_json(split, self.split_new_det_json_path) - print('Creating new splits for labeled images (767/700) ...') + print('Creating new split for labeled images (767/700) ...') train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_lab_mat_path), self.imgs_labeled_dir, ) - splits = [{ - 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], - 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], - 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], - 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2], + split = [{ + 'train': train_info[0], + 'query': query_info[0], + 'gallery': gallery_info[0], + 'num_train_pids': train_info[1], + 'num_train_imgs': train_info[2], + 'num_query_pids': query_info[1], + 'num_query_imgs': query_info[2], + 'num_gallery_pids': gallery_info[1], + 'num_gallery_imgs': gallery_info[2], }] - write_json(splits, self.split_new_lab_json_path) + write_json(split, self.split_new_lab_json_path) diff --git a/torchreid/datasets/dukemtmcreid.py b/torchreid/datasets/dukemtmcreid.py index 44753fe..5ed3aec 100644 --- a/torchreid/datasets/dukemtmcreid.py +++ b/torchreid/datasets/dukemtmcreid.py @@ -20,8 +20,7 @@ from .bases import BaseImageDataset class DukeMTMCreID(BaseImageDataset): - """ - DukeMTMC-reID + """DukeMTMC-reID Reference: 1. Ristani et al. Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking. ECCVW 2016. @@ -45,14 +44,20 @@ class DukeMTMCreID(BaseImageDataset): self.gallery_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/bounding_box_test') self.download_data() - self.check_before_run() + + required_files = [ + self.dataset_dir, + self.train_dir, + self.query_dir, + self.gallery_dir + ] + self.check_before_run(required_files) train = self.process_dir(self.train_dir, relabel=True) query = self.process_dir(self.query_dir, relabel=False) gallery = self.process_dir(self.gallery_dir, relabel=False) if verbose: - print('=> DukeMTMC-reID loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -65,7 +70,6 @@ class DukeMTMCreID(BaseImageDataset): def download_data(self): if osp.exists(self.dataset_dir): - print('This dataset has been downloaded.') return print('Creating directory {}'.format(self.dataset_dir)) @@ -80,17 +84,6 @@ class DukeMTMCreID(BaseImageDataset): zip_ref.extractall(self.dataset_dir) zip_ref.close() - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.train_dir): - raise RuntimeError('"{}" is not available'.format(self.train_dir)) - if not osp.exists(self.query_dir): - raise RuntimeError('"{}" is not available'.format(self.query_dir)) - if not osp.exists(self.gallery_dir): - raise RuntimeError('"{}" is not available'.format(self.gallery_dir)) - def process_dir(self, dir_path, relabel=False): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) pattern = re.compile(r'([-\d]+)_c(\d)') diff --git a/torchreid/datasets/dukemtmcvidreid.py b/torchreid/datasets/dukemtmcvidreid.py index f165670..6ca43de 100644 --- a/torchreid/datasets/dukemtmcvidreid.py +++ b/torchreid/datasets/dukemtmcvidreid.py @@ -20,8 +20,7 @@ from .bases import BaseVideoDataset class DukeMTMCVidReID(BaseVideoDataset): - """ - DukeMTMCVidReID + """DukeMTMCVidReID Reference: Wu et al. Exploit the Unknown Gradually: One-Shot Video-Based Person @@ -45,18 +44,23 @@ class DukeMTMCVidReID(BaseVideoDataset): self.split_train_json_path = osp.join(self.dataset_dir, 'split_train.json') self.split_query_json_path = osp.join(self.dataset_dir, 'split_query.json') self.split_gallery_json_path = osp.join(self.dataset_dir, 'split_gallery.json') - self.min_seq_len = min_seq_len + self.download_data() - self.check_before_run() - print('Note: if root path is changed, the previously generated json files need to be re-generated (so delete them first)') + + required_files = [ + self.dataset_dir, + self.train_dir, + self.query_dir, + self.gallery_dir + ] + self.check_before_run(required_files) train = self.process_dir(self.train_dir, self.split_train_json_path, relabel=True) query = self.process_dir(self.query_dir, self.split_query_json_path, relabel=False) gallery = self.process_dir(self.gallery_dir, self.split_gallery_json_path, relabel=False) if verbose: - print('=> DukeMTMC-VideoReID loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -69,7 +73,6 @@ class DukeMTMCVidReID(BaseVideoDataset): def download_data(self): if osp.exists(self.dataset_dir): - print('This dataset has been downloaded.') return print('Creating directory {}'.format(self.dataset_dir)) @@ -84,24 +87,12 @@ class DukeMTMCVidReID(BaseVideoDataset): zip_ref.extractall(self.dataset_dir) zip_ref.close() - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.train_dir): - raise RuntimeError('"{}" is not available'.format(self.train_dir)) - if not osp.exists(self.query_dir): - raise RuntimeError('"{}" is not available'.format(self.query_dir)) - if not osp.exists(self.gallery_dir): - raise RuntimeError('"{}" is not available'.format(self.gallery_dir)) - def process_dir(self, dir_path, json_path, relabel): if osp.exists(json_path): - print('=> {} generated before, awesome!'.format(json_path)) split = read_json(json_path) return split['tracklets'] - print('=> Automatically generating split (might take a while for the first time, have a coffe)') + print('=> Generating split json file (** this might take a while **)') pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store print('Processing "{}" with {} person identities'.format(dir_path, len(pdirs))) @@ -114,7 +105,8 @@ class DukeMTMCVidReID(BaseVideoDataset): tracklets = [] for pdir in pdirs: pid = int(osp.basename(pdir)) - if relabel: pid = pid2label[pid] + if relabel: + pid = pid2label[pid] tdirs = glob.glob(osp.join(pdir, '*')) for tdir in tdirs: raw_img_paths = glob.glob(osp.join(tdir, '*.jpg')) diff --git a/torchreid/datasets/grid.py b/torchreid/datasets/grid.py index 3e7fe7c..68cd458 100644 --- a/torchreid/datasets/grid.py +++ b/torchreid/datasets/grid.py @@ -20,8 +20,7 @@ from .bases import BaseImageDataset class GRID(BaseImageDataset): - """ - GRID + """GRID Reference: Loy et al. Multi-camera activity correlation analysis. CVPR 2009. @@ -45,7 +44,14 @@ class GRID(BaseImageDataset): self.split_path = osp.join(self.dataset_dir, 'splits.json') self.download_data() - self.check_before_run() + + required_files = [ + self.dataset_dir, + self.probe_path, + self.gallery_path, + self.split_mat_path + ] + self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) @@ -62,7 +68,6 @@ class GRID(BaseImageDataset): gallery = [tuple(item) for item in gallery] if verbose: - print('=> GRID loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -73,20 +78,8 @@ class GRID(BaseImageDataset): self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.probe_path): - raise RuntimeError('"{}" is not available'.format(self.probe_path)) - if not osp.exists(self.gallery_path): - raise RuntimeError('"{}" is not available'.format(self.gallery_path)) - if not osp.exists(self.split_mat_path): - raise RuntimeError('"{}" is not available'.format(self.split_mat_path)) - def download_data(self): if osp.exists(self.dataset_dir): - print('This dataset has been downloaded.') return print('Creating directory {}'.format(self.dataset_dir)) @@ -123,10 +116,8 @@ class GRID(BaseImageDataset): img_idx = int(img_name.split('_')[0]) camid = int(img_name.split('_')[1]) - 1 # index starts from 0 if img_idx in train_idxs: - # add to train data train.append((img_path, idx2label[img_idx], camid)) else: - # add to query data query.append((img_path, img_idx, camid)) # process gallery folder @@ -135,21 +126,20 @@ class GRID(BaseImageDataset): img_idx = int(img_name.split('_')[0]) camid = int(img_name.split('_')[1]) - 1 # index starts from 0 if img_idx in train_idxs: - # add to train data train.append((img_path, idx2label[img_idx], camid)) else: - # add to gallery data gallery.append((img_path, img_idx, camid)) - split = {'train': train, 'query': query, 'gallery': gallery, - 'num_train_pids': 125, - 'num_query_pids': 125, - 'num_gallery_pids': 900, - } + split = { + 'train': train, + 'query': query, + 'gallery': gallery, + 'num_train_pids': 125, + 'num_query_pids': 125, + 'num_gallery_pids': 900 + } splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) - print('Split file saved to {}'.format(self.split_path)) - - print('Splits created') \ No newline at end of file + print('Split file saved to {}'.format(self.split_path)) \ No newline at end of file diff --git a/torchreid/datasets/ilids.py b/torchreid/datasets/ilids.py index 4ca7328..21f67f6 100644 --- a/torchreid/datasets/ilids.py +++ b/torchreid/datasets/ilids.py @@ -23,8 +23,7 @@ from .bases import BaseImageDataset class iLIDS(BaseImageDataset): - """ - QMUL-iLIDS + """QMUL-iLIDS Reference: Zheng et al. Associating Groups of People. BMVC 2009. @@ -44,7 +43,12 @@ class iLIDS(BaseImageDataset): self.split_path = osp.join(self.dataset_dir, 'splits.json') self.download_data() - self.check_before_run() + + required_files = [ + self.dataset_dir, + self.data_dir + ] + self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) @@ -55,7 +59,6 @@ class iLIDS(BaseImageDataset): train, query, gallery = self.process_split(split) if verbose: - print('=> iLIDS loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -68,7 +71,6 @@ class iLIDS(BaseImageDataset): def download_data(self): if osp.exists(self.dataset_dir): - print('This dataset has been downloaded.') return mkdir_if_missing(self.dataset_dir) @@ -82,18 +84,10 @@ class iLIDS(BaseImageDataset): tar.extractall(path=self.dataset_dir) tar.close() - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.data_dir): - raise RuntimeError('"{}" is not available'.format(self.data_dir)) - def prepare_split(self): if not osp.exists(self.split_path): print('Creating splits ...') - # read image paths paths = glob.glob(osp.join(self.data_dir, '*.jpg')) img_names = [osp.basename(path) for path in paths] num_imgs = len(img_names) @@ -122,7 +116,6 @@ class iLIDS(BaseImageDataset): train_pids = pids_copy[:num_train_pids] test_pids = pids_copy[num_train_pids:] - # store image names train = [] query = [] gallery = [] diff --git a/torchreid/datasets/ilidsvid.py b/torchreid/datasets/ilidsvid.py index bcbca5f..bcd6197 100644 --- a/torchreid/datasets/ilidsvid.py +++ b/torchreid/datasets/ilidsvid.py @@ -20,8 +20,7 @@ from .bases import BaseVideoDataset class iLIDSVID(BaseVideoDataset): - """ - iLIDS-VID + """iLIDS-VID Reference: Wang et al. Person Re-Identification by Video Ranking. ECCV 2014. @@ -47,7 +46,13 @@ class iLIDSVID(BaseVideoDataset): self.cam_2_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam2') self.download_data() - self.check_before_run() + + required_files = [ + self.dataset_dir, + self.data_dir, + self.split_dir + ] + self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) @@ -55,14 +60,12 @@ class iLIDSVID(BaseVideoDataset): raise ValueError('split_id exceeds range, received {}, but expected between 0 and {}'.format(split_id, len(splits)-1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] - print('# train identites: {}, # test identites {}'.format(len(train_dirs), len(test_dirs))) train = self.process_data(train_dirs, cam1=True, cam2=True) query = self.process_data(test_dirs, cam1=True, cam2=False) gallery = self.process_data(test_dirs, cam1=False, cam2=True) if verbose: - print('=> iLIDS-VID loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -75,7 +78,6 @@ class iLIDSVID(BaseVideoDataset): def download_data(self): if osp.exists(self.dataset_dir): - print('This dataset has been downloaded.') return mkdir_if_missing(self.dataset_dir) @@ -89,15 +91,6 @@ class iLIDSVID(BaseVideoDataset): tar.extractall(path=self.dataset_dir) tar.close() - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.data_dir): - raise RuntimeError('"{}" is not available'.format(self.data_dir)) - if not osp.exists(self.split_dir): - raise RuntimeError('"{}" is not available'.format(self.split_dir)) - def prepare_split(self): if not osp.exists(self.split_path): print('Creating splits ...') @@ -140,8 +133,6 @@ class iLIDSVID(BaseVideoDataset): print('Split file is saved to {}'.format(self.split_path)) write_json(splits, self.split_path) - print('Splits created') - def process_data(self, dirnames, cam1=True, cam2=True): tracklets = [] dirname2pid = {dirname:i for i, dirname in enumerate(dirnames)} diff --git a/torchreid/datasets/market1501.py b/torchreid/datasets/market1501.py index ed68835..25bfb72 100644 --- a/torchreid/datasets/market1501.py +++ b/torchreid/datasets/market1501.py @@ -19,8 +19,7 @@ from .bases import BaseImageDataset class Market1501(BaseImageDataset): - """ - Market1501 + """Market1501 Reference: Zheng et al. Scalable Person Re-identification: A Benchmark. ICCV 2015. @@ -42,7 +41,15 @@ class Market1501(BaseImageDataset): self.extra_gallery_dir = osp.join(self.dataset_dir, 'images') self.market1501_500k = market1501_500k - self.check_before_run() + required_files = [ + self.dataset_dir, + self.train_dir, + self.query_dir, + self.gallery_dir + ] + if self.market1501_500k: + required_files.append(self.extra_gallery_dir) + self.check_before_run(required_files) train = self.process_dir(self.train_dir, relabel=True) query = self.process_dir(self.query_dir, relabel=False) @@ -51,7 +58,6 @@ class Market1501(BaseImageDataset): gallery += self.process_dir(self.extra_gallery_dir, relabel=False) if verbose: - print('=> Market1501 loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -62,19 +68,6 @@ class Market1501(BaseImageDataset): self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.train_dir): - raise RuntimeError('"{}" is not available'.format(self.train_dir)) - if not osp.exists(self.query_dir): - raise RuntimeError('"{}" is not available'.format(self.query_dir)) - if not osp.exists(self.gallery_dir): - raise RuntimeError('"{}" is not available'.format(self.gallery_dir)) - if self.market1501_500k and not osp.exists(self.extra_gallery_dir): - raise RuntimeError('"{}" is not available'.format(self.extra_gallery_dir)) - def process_dir(self, dir_path, relabel=False): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) pattern = re.compile(r'([-\d]+)_c(\d)') @@ -82,18 +75,21 @@ class Market1501(BaseImageDataset): pid_container = set() for img_path in img_paths: pid, _ = map(int, pattern.search(img_path).groups()) - if pid == -1: continue # junk images are just ignored + if pid == -1: + continue # junk images are just ignored pid_container.add(pid) pid2label = {pid:label for label, pid in enumerate(pid_container)} dataset = [] for img_path in img_paths: pid, camid = map(int, pattern.search(img_path).groups()) - if pid == -1: continue # junk images are just ignored + if pid == -1: + continue # junk images are just ignored assert 0 <= pid <= 1501 # pid == 0 means background assert 1 <= camid <= 6 camid -= 1 # index starts from 0 - if relabel: pid = pid2label[pid] + if relabel: + pid = pid2label[pid] dataset.append((img_path, pid, camid)) return dataset \ No newline at end of file diff --git a/torchreid/datasets/mars.py b/torchreid/datasets/mars.py index 446a60e..1af5d25 100644 --- a/torchreid/datasets/mars.py +++ b/torchreid/datasets/mars.py @@ -19,8 +19,7 @@ from .bases import BaseVideoDataset class Mars(BaseVideoDataset): - """ - MARS + """MARS Reference: Zheng et al. MARS: A Video Benchmark for Large-Scale Person Re-identification. ECCV 2016. @@ -43,9 +42,16 @@ class Mars(BaseVideoDataset): self.track_test_info_path = osp.join(self.dataset_dir, 'info/tracks_test_info.mat') self.query_IDX_path = osp.join(self.dataset_dir, 'info/query_IDX.mat') - self.check_before_run() + required_files = [ + self.dataset_dir, + self.train_name_path, + self.test_name_path, + self.track_train_info_path, + self.track_test_info_path, + self.query_IDX_path + ] + self.check_before_run(required_files) - # prepare meta data train_names = self.get_names(self.train_name_path) test_names = self.get_names(self.test_name_path) track_train = loadmat(self.track_train_info_path)['track_train_info'] # numpy.ndarray (8298, 4) @@ -61,7 +67,6 @@ class Mars(BaseVideoDataset): gallery = self.process_data(test_names, track_gallery, home_dir='bbox_test', relabel=False, min_seq_len=min_seq_len) if verbose: - print('=> MARS loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -72,21 +77,6 @@ class Mars(BaseVideoDataset): self.num_query_pids, _, self.num_query_cams = self.get_videodata_info(self.query) self.num_gallery_pids, _, self.num_gallery_cams = self.get_videodata_info(self.gallery) - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.train_name_path): - raise RuntimeError('"{}" is not available'.format(self.train_name_path)) - if not osp.exists(self.test_name_path): - raise RuntimeError('"{}" is not available'.format(self.test_name_path)) - if not osp.exists(self.track_train_info_path): - raise RuntimeError('"{}" is not available'.format(self.track_train_info_path)) - if not osp.exists(self.track_test_info_path): - raise RuntimeError('"{}" is not available'.format(self.track_test_info_path)) - if not osp.exists(self.query_IDX_path): - raise RuntimeError('"{}" is not available'.format(self.query_IDX_path)) - def get_names(self, fpath): names = [] with open(fpath, 'r') as f: @@ -107,7 +97,8 @@ class Mars(BaseVideoDataset): for tracklet_idx in range(num_tracklets): data = meta_data[tracklet_idx,...] start_index, end_index, pid, camid = data - if pid == -1: continue # junk images are just ignored + if pid == -1: + continue # junk images are just ignored assert 1 <= camid <= 6 if relabel: pid = pid2label[pid] camid -= 1 # index starts from 0 diff --git a/torchreid/datasets/msmt17.py b/torchreid/datasets/msmt17.py index a6b6354..6007376 100644 --- a/torchreid/datasets/msmt17.py +++ b/torchreid/datasets/msmt17.py @@ -21,23 +21,22 @@ from .bases import BaseImageDataset # To adapt to different versions # Log: # 22.01.2019: v1 and v2 only differ in dir names -_TRAIN_DIR_KEY = 'train_dir' -_TEST_DIR_KEY = 'test_dir' -_VERSION = { +TRAIN_DIR_KEY = 'train_dir' +TEST_DIR_KEY = 'test_dir' +VERSION_DICT = { 'MSMT17_V1': { - _TRAIN_DIR_KEY: 'train', - _TEST_DIR_KEY: 'test', + TRAIN_DIR_KEY: 'train', + TEST_DIR_KEY: 'test', }, 'MSMT17_V2': { - _TRAIN_DIR_KEY: 'mask_train_v2', - _TEST_DIR_KEY: 'mask_test_v2', + TRAIN_DIR_KEY: 'mask_train_v2', + TEST_DIR_KEY: 'mask_test_v2', } } class MSMT17(BaseImageDataset): - """ - MSMT17 + """MSMT17 Reference: Wei et al. Person Transfer GAN to Bridge Domain Gap for Person Re-Identification. CVPR 2018. @@ -55,10 +54,10 @@ class MSMT17(BaseImageDataset): super(MSMT17, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) has_main_dir = False - for main_dir in _VERSION: + for main_dir in VERSION_DICT: if osp.exists(osp.join(self.dataset_dir, main_dir)): - train_dir = _VERSION[main_dir][_TRAIN_DIR_KEY] - test_dir = _VERSION[main_dir][_TEST_DIR_KEY] + train_dir = VERSION_DICT[main_dir][TRAIN_DIR_KEY] + test_dir = VERSION_DICT[main_dir][TEST_DIR_KEY] has_main_dir = True break assert has_main_dir, 'Dataset folder not found' @@ -69,9 +68,15 @@ class MSMT17(BaseImageDataset): self.list_query_path = osp.join(self.dataset_dir, main_dir, 'list_query.txt') self.list_gallery_path = osp.join(self.dataset_dir, main_dir, 'list_gallery.txt') - self.check_before_run() + required_files = [ + self.dataset_dir, + self.train_dir, + self.test_dir + ] + self.check_before_run(required_files) + train = self.process_dir(self.train_dir, self.list_train_path) - #val = self.process_dir(self.train_dir, self.list_val_path) + val = self.process_dir(self.train_dir, self.list_val_path) query = self.process_dir(self.test_dir, self.list_query_path) gallery = self.process_dir(self.test_dir, self.list_gallery_path) @@ -80,7 +85,6 @@ class MSMT17(BaseImageDataset): #num_train_imgs += num_val_imgs if verbose: - print('=> MSMT17 loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -91,15 +95,6 @@ class MSMT17(BaseImageDataset): self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.train_dir): - raise RuntimeError('"{}" is not available'.format(self.train_dir)) - if not osp.exists(self.test_dir): - raise RuntimeError('"{}" is not available'.format(self.test_dir)) - def process_dir(self, dir_path, list_path): with open(list_path, 'r') as txt: lines = txt.readlines() @@ -113,7 +108,7 @@ class MSMT17(BaseImageDataset): dataset.append((img_path, pid, camid)) pid_container.add(pid) num_pids = len(pid_container) - # check if pid starts from 0 and increments with 1 for idx, pid in enumerate(pid_container): - assert idx == pid, 'See code comment for explanation' + if idx != pid: + raise RuntimeError('pid does not start from 0 and increment by 1') return dataset \ No newline at end of file diff --git a/torchreid/datasets/prid.py b/torchreid/datasets/prid.py index 87d4e23..151873c 100644 --- a/torchreid/datasets/prid.py +++ b/torchreid/datasets/prid.py @@ -23,8 +23,7 @@ from .bases import BaseImageDataset class PRID(BaseImageDataset): - """ - PRID (single-shot version of prid-2011) + """PRID (single-shot version of prid-2011) Reference: Hirzer et al. Person Re-Identification by Descriptive and Discriminative Classification. SCIA 2011. @@ -46,7 +45,12 @@ class PRID(BaseImageDataset): self.cam_b_dir = osp.join(self.dataset_dir, 'prid_2011', 'single_shot', 'cam_b') self.split_path = osp.join(self.dataset_dir, 'splits_single_shot.json') - self.check_before_run() + required_files = [ + self.dataset_dir, + self.cam_a_dir, + self.cam_b_dir + ] + self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) @@ -57,7 +61,6 @@ class PRID(BaseImageDataset): train, query, gallery = self.process_split(split) if verbose: - print('=> PRID loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -68,15 +71,6 @@ class PRID(BaseImageDataset): self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.cam_a_dir): - raise RuntimeError('"{}" is not available'.format(self.cam_a_dir)) - if not osp.exists(self.cam_b_dir): - raise RuntimeError('"{}" is not available'.format(self.cam_b_dir)) - def prepare_split(self): if not osp.exists(self.split_path): print('Creating splits ...') diff --git a/torchreid/datasets/prid2011.py b/torchreid/datasets/prid2011.py index 9acc718..d5b3050 100644 --- a/torchreid/datasets/prid2011.py +++ b/torchreid/datasets/prid2011.py @@ -20,8 +20,7 @@ from .bases import BaseVideoDataset class PRID2011(BaseVideoDataset): - """ - PRID2011 + """PRID2011 Reference: Hirzer et al. Person Re-Identification by Descriptive and Discriminative Classification. SCIA 2011. @@ -39,23 +38,27 @@ class PRID2011(BaseVideoDataset): super(PRID2011, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.split_path = osp.join(self.dataset_dir, 'splits_prid2011.json') - self.cam_a_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_a') - self.cam_b_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_b') + self.cam_a_dir = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_a') + self.cam_b_dir = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_b') + + required_files = [ + self.dataset_dir, + self.cam_a_dir, + self.cam_b_dir + ] + self.check_before_run(required_files) - self.check_before_run() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError('split_id exceeds range, received {}, but expected between 0 and {}'.format(split_id, len(splits)-1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] - print('# train identites: {}, # test identites {}'.format(len(train_dirs), len(test_dirs))) train = self.process_dir(train_dirs, cam1=True, cam2=True) query = self.process_dir(test_dirs, cam1=True, cam2=False) gallery = self.process_dir(test_dirs, cam1=False, cam2=True) if verbose: - print('=> PRID2011 loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -66,18 +69,13 @@ class PRID2011(BaseVideoDataset): self.num_query_pids, _, self.num_query_cams = self.get_videodata_info(self.query) self.num_gallery_pids, _, self.num_gallery_cams = self.get_videodata_info(self.gallery) - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - def process_dir(self, dirnames, cam1=True, cam2=True): tracklets = [] dirname2pid = {dirname:i for i, dirname in enumerate(dirnames)} for dirname in dirnames: if cam1: - person_dir = osp.join(self.cam_a_path, dirname) + person_dir = osp.join(self.cam_a_dir, dirname) img_names = glob.glob(osp.join(person_dir, '*.png')) assert len(img_names) > 0 img_names = tuple(img_names) @@ -85,7 +83,7 @@ class PRID2011(BaseVideoDataset): tracklets.append((img_names, pid, 0)) if cam2: - person_dir = osp.join(self.cam_b_path, dirname) + person_dir = osp.join(self.cam_b_dir, dirname) img_names = glob.glob(osp.join(person_dir, '*.png')) assert len(img_names) > 0 img_names = tuple(img_names) diff --git a/torchreid/datasets/prid450s.py b/torchreid/datasets/prid450s.py index 6d0258a..437a3aa 100644 --- a/torchreid/datasets/prid450s.py +++ b/torchreid/datasets/prid450s.py @@ -20,8 +20,7 @@ from .bases import BaseImageDataset class PRID450S(BaseImageDataset): - """ - PRID450S + """PRID450S Reference: Roth et al. Mahalanobis Distance Learning for Person Re-Identification. PR 2014. @@ -40,11 +39,17 @@ class PRID450S(BaseImageDataset): self.dataset_dir = osp.join(self.root, self.dataset_dir) self.dataset_url = 'https://files.icg.tugraz.at/f/8c709245bb/?raw=1' self.split_path = osp.join(self.dataset_dir, 'splits.json') - self.cam_a_path = osp.join(self.dataset_dir, 'cam_a') - self.cam_b_path = osp.join(self.dataset_dir, 'cam_b') + self.cam_a_dir = osp.join(self.dataset_dir, 'cam_a') + self.cam_b_dir = osp.join(self.dataset_dir, 'cam_b') self.download_data() - self.check_before_run() + + required_files = [ + self.dataset_dir, + self.cam_a_dir, + self.cam_b_dir + ] + self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) @@ -61,7 +66,6 @@ class PRID450S(BaseImageDataset): gallery = [tuple(item) for item in gallery] if verbose: - print('=> PRID450S loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -72,18 +76,8 @@ class PRID450S(BaseImageDataset): self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.cam_a_path): - raise RuntimeError('"{}" is not available'.format(self.cam_a_path)) - if not osp.exists(self.cam_b_path): - raise RuntimeError('"{}" is not available'.format(self.cam_b_path)) - def download_data(self): if osp.exists(self.dataset_dir): - print('This dataset has been downloaded.') return print('Creating directory {}'.format(self.dataset_dir)) @@ -100,8 +94,8 @@ class PRID450S(BaseImageDataset): def prepare_split(self): if not osp.exists(self.split_path): - cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_path, 'img_*.png'))) - cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_path, 'img_*.png'))) + cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_dir, 'img_*.png'))) + cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_dir, 'img_*.png'))) assert len(cam_a_imgs) == len(cam_b_imgs) num_pids = len(cam_a_imgs) @@ -134,15 +128,16 @@ class PRID450S(BaseImageDataset): else: test.append((img_path, img_idx, 1)) - split = {'train': train, 'query': test, 'gallery': test, - 'num_train_pids': num_train_pids, - 'num_query_pids': num_pids - num_train_pids, - 'num_gallery_pids': num_pids - num_train_pids, - } + split = { + 'train': train, + 'query': test, + 'gallery': test, + 'num_train_pids': num_train_pids, + 'num_query_pids': num_pids - num_train_pids, + 'num_gallery_pids': num_pids - num_train_pids + } splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) - print('Split file saved to {}'.format(self.split_path)) - - print('Splits created') \ No newline at end of file + print('Split file saved to {}'.format(self.split_path)) \ No newline at end of file diff --git a/torchreid/datasets/sensereid.py b/torchreid/datasets/sensereid.py index 0e5b8c8..337d3c0 100644 --- a/torchreid/datasets/sensereid.py +++ b/torchreid/datasets/sensereid.py @@ -20,8 +20,7 @@ from .bases import BaseImageDataset class SenseReID(BaseImageDataset): - """ - SenseReID + """SenseReID This dataset is used for test purpose only. @@ -44,13 +43,17 @@ class SenseReID(BaseImageDataset): self.query_dir = osp.join(self.dataset_dir, 'SenseReID', 'test_probe') self.gallery_dir = osp.join(self.dataset_dir, 'SenseReID', 'test_gallery') - self.check_before_run() + required_files = [ + self.dataset_dir, + self.query_dir, + self.gallery_dir + ] + self.check_before_run(required_files) query = self.process_dir(self.query_dir) gallery = self.process_dir(self.gallery_dir) if verbose: - print('=> SenseReID loaded (test only)') self.print_dataset_statistics(query, query, gallery) self.train = copy.deepcopy(query) # only used to initialize trainloader @@ -61,15 +64,6 @@ class SenseReID(BaseImageDataset): self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.query_dir): - raise RuntimeError('"{}" is not available'.format(self.query_dir)) - if not osp.exists(self.gallery_dir): - raise RuntimeError('"{}" is not available'.format(self.gallery_dir)) - def process_dir(self, dir_path): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) dataset = [] diff --git a/torchreid/datasets/viper.py b/torchreid/datasets/viper.py index 32e98f4..05a777a 100755 --- a/torchreid/datasets/viper.py +++ b/torchreid/datasets/viper.py @@ -19,8 +19,7 @@ from .bases import BaseImageDataset class VIPeR(BaseImageDataset): - """ - VIPeR + """VIPeR Reference: Gray et al. Evaluating appearance models for recognition, reacquisition, and tracking. PETS 2007. @@ -38,12 +37,18 @@ class VIPeR(BaseImageDataset): super(VIPeR, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.dataset_url = 'http://users.soe.ucsc.edu/~manduchi/VIPeR.v1.0.zip' - self.cam_a_path = osp.join(self.dataset_dir, 'VIPeR', 'cam_a') - self.cam_b_path = osp.join(self.dataset_dir, 'VIPeR', 'cam_b') + self.cam_a_dir = osp.join(self.dataset_dir, 'VIPeR', 'cam_a') + self.cam_b_dir = osp.join(self.dataset_dir, 'VIPeR', 'cam_b') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.download_data() - self.check_before_run() + + required_files = [ + self.dataset_dir, + self.cam_a_dir, + self.cam_b_dir + ] + self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) @@ -52,7 +57,7 @@ class VIPeR(BaseImageDataset): split = splits[split_id] train = split['train'] - query = split['query'] # query and gallery share the same images + query = split['query'] # note: query and gallery share the same images gallery = split['gallery'] train = [tuple(item) for item in train] @@ -60,7 +65,6 @@ class VIPeR(BaseImageDataset): gallery = [tuple(item) for item in gallery] if verbose: - print('=> VIPeR loaded') self.print_dataset_statistics(train, query, gallery) self.train = train @@ -73,7 +77,6 @@ class VIPeR(BaseImageDataset): def download_data(self): if osp.exists(self.dataset_dir): - print('This dataset has been downloaded.') return print('Creating directory {}'.format(self.dataset_dir)) @@ -88,21 +91,12 @@ class VIPeR(BaseImageDataset): zip_ref.extractall(self.dataset_dir) zip_ref.close() - def check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError('"{}" is not available'.format(self.dataset_dir)) - if not osp.exists(self.cam_a_path): - raise RuntimeError('"{}" is not available'.format(self.cam_a_path)) - if not osp.exists(self.cam_b_path): - raise RuntimeError('"{}" is not available'.format(self.cam_b_path)) - def prepare_split(self): if not osp.exists(self.split_path): print('Creating 10 random splits of train ids and test ids') - cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_path, '*.bmp'))) - cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_path, '*.bmp'))) + cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_dir, '*.bmp'))) + cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_dir, '*.bmp'))) assert len(cam_a_imgs) == len(cam_b_imgs) num_pids = len(cam_a_imgs) print('Number of identities: {}'.format(num_pids)) @@ -141,23 +135,27 @@ class VIPeR(BaseImageDataset): test_b.append((cam_b_img, pid, 1)) # use cameraA as query and cameraB as gallery - split = {'train': train, 'query': test_a, 'gallery': test_b, - 'num_train_pids': num_train_pids, - 'num_query_pids': num_pids - num_train_pids, - 'num_gallery_pids': num_pids - num_train_pids - } + split = { + 'train': train, + 'query': test_a, + 'gallery': test_b, + 'num_train_pids': num_train_pids, + 'num_query_pids': num_pids - num_train_pids, + 'num_gallery_pids': num_pids - num_train_pids + } splits.append(split) # use cameraB as query and cameraA as gallery - split = {'train': train, 'query': test_b, 'gallery': test_a, - 'num_train_pids': num_train_pids, - 'num_query_pids': num_pids - num_train_pids, - 'num_gallery_pids': num_pids - num_train_pids - } + split = { + 'train': train, + 'query': test_b, + 'gallery': test_a, + 'num_train_pids': num_train_pids, + 'num_query_pids': num_pids - num_train_pids, + 'num_gallery_pids': num_pids - num_train_pids + } splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) - print('Split file saved to {}'.format(self.split_path)) - - print('Splits created') \ No newline at end of file + print('Split file saved to {}'.format(self.split_path)) \ No newline at end of file