# encoding: utf-8 """ @author: liaoxingyu @contact: liaoxingyu2@jd.com """ import glob import re import urllib import zipfile import os.path as osp from utils.iotools import mkdir_if_missing from .bases import BaseImageDataset class DukeMTMCreID(BaseImageDataset): """ DukeMTMC-reID Reference: 1. Ristani et al. Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking. ECCVW 2016. 2. Zheng et al. Unlabeled Samples Generated by GAN Improve the Person Re-identification Baseline in vitro. ICCV 2017. URL: https://github.com/layumi/DukeMTMC-reID_evaluation Dataset statistics: # identities: 1404 (train + query) # images:16522 (train) + 2228 (query) + 17661 (gallery) # cameras: 8 """ dataset_dir = 'dukemtmc-reid' def __init__(self, root='/home/haoluo/data', verbose=True, **kwargs): super(DukeMTMCreID, self).__init__() self.dataset_dir = osp.join(root, self.dataset_dir) self.dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-reID.zip' self.train_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/bounding_box_train') self.query_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/query') self.gallery_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/bounding_box_test') self._download_data() self._check_before_run() train = self._process_dir(self.train_dir, relabel=True) query = self._process_dir(self.query_dir, relabel=False) gallery = self._process_dir(self.gallery_dir, relabel=False) if verbose: print("=> DukeMTMC-reID loaded") self.print_dataset_statistics(train, query, gallery) self.train = train self.query = query self.gallery = gallery self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train) self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) def _download_data(self): if osp.exists(self.dataset_dir): print("This dataset has been downloaded.") return print("Creating directory {}".format(self.dataset_dir)) mkdir_if_missing(self.dataset_dir) fpath = osp.join(self.dataset_dir, osp.basename(self.dataset_url)) print("Downloading DukeMTMC-reID dataset") urllib.urlretrieve(self.dataset_url, fpath) print("Extracting files") zip_ref = zipfile.ZipFile(fpath, 'r') zip_ref.extractall(self.dataset_dir) zip_ref.close() def _check_before_run(self): """Check if all files are available before going deeper""" if not osp.exists(self.dataset_dir): raise RuntimeError("'{}' is not available".format(self.dataset_dir)) if not osp.exists(self.train_dir): raise RuntimeError("'{}' is not available".format(self.train_dir)) if not osp.exists(self.query_dir): raise RuntimeError("'{}' is not available".format(self.query_dir)) if not osp.exists(self.gallery_dir): raise RuntimeError("'{}' is not available".format(self.gallery_dir)) def _process_dir(self, dir_path, relabel=False): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) pattern = re.compile(r'([-\d]+)_c(\d)') pid_container = set() for img_path in img_paths: pid, _ = map(int, pattern.search(img_path).groups()) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} dataset = [] for img_path in img_paths: pid, camid = map(int, pattern.search(img_path).groups()) assert 1 <= camid <= 8 camid -= 1 # index starts from 0 if relabel: pid = pid2label[pid] dataset.append((img_path, pid, camid)) return dataset