from __future__ import absolute_import import os import glob import re import sys import os.path as osp """Dataset classes""" class Market1501(object): """ Market1501 Reference: Zheng et al. Scalable Person Re-identification: A Benchmark. ICCV 2015. ========================== Dataset statistics: # identities: 1501 (+1 for background) # images: 12936 (train) + 3368 (query) + 15913 (gallery) = """ root = './data/market1501' train_dir = osp.join(root, 'bounding_box_train') query_dir = osp.join(root, 'query') gallery_dir = osp.join(root, 'bounding_box_test') def __init__(self): self._check_dir(self.root) self._check_dir(self.train_dir) self._check_dir(self.query_dir) self._check_dir(self.gallery_dir) train, num_train_pids, num_train_imgs = self._process_dir(self.train_dir, relabel=True) query, num_query_pids, num_query_imgs = self._process_dir(self.query_dir, relabel=False) gallery, num_gallery_pids, num_gallery_imgs = self._process_dir(self.gallery_dir, relabel=False) num_total_pids = num_train_pids + num_query_pids num_total_imgs = num_train_imgs + num_query_imgs + num_gallery_imgs print("=> Market1501 loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # images") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_imgs)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_imgs)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_imgs)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_imgs)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids def _process_dir(self, dir_path, relabel=False): print("Processing directory '{}'".format(dir_path)) img_paths = glob.glob(osp.join(dir_path, '*.jpg')) pattern = re.compile(r'([-\d]+)_c(\d)') pid_container = set() for img_path in img_paths: pid, _ = map(int, pattern.search(img_path).groups()) if pid == -1: continue # junk images are just ignored pid_container.add(pid) pid2label = {pid:label for label, pid in enumerate(pid_container)} dataset = [] for img_path in img_paths: pid, camid = map(int, pattern.search(img_path).groups()) if pid == -1: continue # junk images are just ignored assert 0 <= pid <= 1501 # pid == 0 means background assert 1 <= camid <= 6 camid -= 1 # index starts from 0 if relabel: pid = pid2label[pid] dataset.append((img_path, pid, camid)) num_pids = len(pid_container) num_imgs = len(dataset) return dataset, num_pids, num_imgs def _check_dir(self, dir_path): if not osp.exists(dir_path): print("Error: '{}' is not available.".format(dir_path)) sys.exit() """Create dataset""" __factory = { 'market1501': Market1501, } def get_names(): return __factory.keys() def init_dataset(name, *args, **kwargs): if name not in __factory.keys(): raise KeyError("Unknown dataset: {}".format(name)) return __factory[name](*args, **kwargs) if __name__ == '__main__': dataset = Market1501()