260 lines
12 KiB
Python
260 lines
12 KiB
Python
# encoding: utf-8
|
|
"""
|
|
@author: liaoxingyu
|
|
@contact: liaoxingyu2@jd.com
|
|
"""
|
|
|
|
import h5py
|
|
import os.path as osp
|
|
from scipy.io import loadmat
|
|
from scipy.misc import imsave
|
|
|
|
from utils.iotools import mkdir_if_missing, write_json, read_json
|
|
from .bases import BaseImageDataset
|
|
|
|
|
|
class CUHK03(BaseImageDataset):
|
|
"""
|
|
CUHK03
|
|
Reference:
|
|
Li et al. DeepReID: Deep Filter Pairing Neural Network for Person Re-identification. CVPR 2014.
|
|
URL: http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html#!
|
|
|
|
Dataset statistics:
|
|
# identities: 1360
|
|
# images: 13164
|
|
# cameras: 6
|
|
# splits: 20 (classic)
|
|
Args:
|
|
split_id (int): split index (default: 0)
|
|
cuhk03_labeled (bool): whether to load labeled images; if false, detected images are loaded (default: False)
|
|
"""
|
|
dataset_dir = 'cuhk03'
|
|
|
|
def __init__(self, root='/home/haoluo/data', split_id=0, cuhk03_labeled=False,
|
|
cuhk03_classic_split=False, verbose=True,
|
|
**kwargs):
|
|
super(CUHK03, self).__init__()
|
|
self.dataset_dir = osp.join(root, self.dataset_dir)
|
|
self.data_dir = osp.join(self.dataset_dir, 'cuhk03_release')
|
|
self.raw_mat_path = osp.join(self.data_dir, 'cuhk-03.mat')
|
|
|
|
self.imgs_detected_dir = osp.join(self.dataset_dir, 'images_detected')
|
|
self.imgs_labeled_dir = osp.join(self.dataset_dir, 'images_labeled')
|
|
|
|
self.split_classic_det_json_path = osp.join(self.dataset_dir, 'splits_classic_detected.json')
|
|
self.split_classic_lab_json_path = osp.join(self.dataset_dir, 'splits_classic_labeled.json')
|
|
|
|
self.split_new_det_json_path = osp.join(self.dataset_dir, 'splits_new_detected.json')
|
|
self.split_new_lab_json_path = osp.join(self.dataset_dir, 'splits_new_labeled.json')
|
|
|
|
self.split_new_det_mat_path = osp.join(self.dataset_dir, 'cuhk03_new_protocol_config_detected.mat')
|
|
self.split_new_lab_mat_path = osp.join(self.dataset_dir, 'cuhk03_new_protocol_config_labeled.mat')
|
|
|
|
self._check_before_run()
|
|
self._preprocess()
|
|
|
|
if cuhk03_labeled:
|
|
image_type = 'labeled'
|
|
split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path
|
|
else:
|
|
image_type = 'detected'
|
|
split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path
|
|
|
|
splits = read_json(split_path)
|
|
assert split_id < len(splits), "Condition split_id ({}) < len(splits) ({}) is false".format(split_id,
|
|
len(splits))
|
|
split = splits[split_id]
|
|
print("Split index = {}".format(split_id))
|
|
|
|
train = split['train']
|
|
query = split['query']
|
|
gallery = split['gallery']
|
|
|
|
if verbose:
|
|
print("=> CUHK03 ({}) loaded".format(image_type))
|
|
self.print_dataset_statistics(train, query, gallery)
|
|
|
|
self.train = train
|
|
self.query = query
|
|
self.gallery = gallery
|
|
|
|
self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train)
|
|
self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query)
|
|
self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)
|
|
|
|
def _check_before_run(self):
|
|
"""Check if all files are available before going deeper"""
|
|
if not osp.exists(self.dataset_dir):
|
|
raise RuntimeError("'{}' is not available".format(self.dataset_dir))
|
|
if not osp.exists(self.data_dir):
|
|
raise RuntimeError("'{}' is not available".format(self.data_dir))
|
|
if not osp.exists(self.raw_mat_path):
|
|
raise RuntimeError("'{}' is not available".format(self.raw_mat_path))
|
|
if not osp.exists(self.split_new_det_mat_path):
|
|
raise RuntimeError("'{}' is not available".format(self.split_new_det_mat_path))
|
|
if not osp.exists(self.split_new_lab_mat_path):
|
|
raise RuntimeError("'{}' is not available".format(self.split_new_lab_mat_path))
|
|
|
|
def _preprocess(self):
|
|
"""
|
|
This function is a bit complex and ugly, what it does is
|
|
1. Extract data from cuhk-03.mat and save as png images.
|
|
2. Create 20 classic splits. (Li et al. CVPR'14)
|
|
3. Create new split. (Zhong et al. CVPR'17)
|
|
"""
|
|
print(
|
|
"Note: if root path is changed, the previously generated json files need to be re-generated (delete them first)")
|
|
if osp.exists(self.imgs_labeled_dir) and \
|
|
osp.exists(self.imgs_detected_dir) and \
|
|
osp.exists(self.split_classic_det_json_path) and \
|
|
osp.exists(self.split_classic_lab_json_path) and \
|
|
osp.exists(self.split_new_det_json_path) and \
|
|
osp.exists(self.split_new_lab_json_path):
|
|
return
|
|
|
|
mkdir_if_missing(self.imgs_detected_dir)
|
|
mkdir_if_missing(self.imgs_labeled_dir)
|
|
|
|
print("Extract image data from {} and save as png".format(self.raw_mat_path))
|
|
mat = h5py.File(self.raw_mat_path, 'r')
|
|
|
|
def _deref(ref):
|
|
return mat[ref][:].T
|
|
|
|
def _process_images(img_refs, campid, pid, save_dir):
|
|
img_paths = [] # Note: some persons only have images for one view
|
|
for imgid, img_ref in enumerate(img_refs):
|
|
img = _deref(img_ref)
|
|
# skip empty cell
|
|
if img.size == 0 or img.ndim < 3: continue
|
|
# images are saved with the following format, index-1 (ensure uniqueness)
|
|
# campid: index of camera pair (1-5)
|
|
# pid: index of person in 'campid'-th camera pair
|
|
# viewid: index of view, {1, 2}
|
|
# imgid: index of image, (1-10)
|
|
viewid = 1 if imgid < 5 else 2
|
|
img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format(campid + 1, pid + 1, viewid, imgid + 1)
|
|
img_path = osp.join(save_dir, img_name)
|
|
if not osp.isfile(img_path):
|
|
imsave(img_path, img)
|
|
img_paths.append(img_path)
|
|
return img_paths
|
|
|
|
def _extract_img(name):
|
|
print("Processing {} images (extract and save) ...".format(name))
|
|
meta_data = []
|
|
imgs_dir = self.imgs_detected_dir if name == 'detected' else self.imgs_labeled_dir
|
|
for campid, camp_ref in enumerate(mat[name][0]):
|
|
camp = _deref(camp_ref)
|
|
num_pids = camp.shape[0]
|
|
for pid in range(num_pids):
|
|
img_paths = _process_images(camp[pid, :], campid, pid, imgs_dir)
|
|
assert len(img_paths) > 0, "campid{}-pid{} has no images".format(campid, pid)
|
|
meta_data.append((campid + 1, pid + 1, img_paths))
|
|
print("- done camera pair {} with {} identities".format(campid + 1, num_pids))
|
|
return meta_data
|
|
|
|
meta_detected = _extract_img('detected')
|
|
meta_labeled = _extract_img('labeled')
|
|
|
|
def _extract_classic_split(meta_data, test_split):
|
|
train, test = [], []
|
|
num_train_pids, num_test_pids = 0, 0
|
|
num_train_imgs, num_test_imgs = 0, 0
|
|
for i, (campid, pid, img_paths) in enumerate(meta_data):
|
|
|
|
if [campid, pid] in test_split:
|
|
for img_path in img_paths:
|
|
camid = int(osp.basename(img_path).split('_')[2]) - 1 # make it 0-based
|
|
test.append((img_path, num_test_pids, camid))
|
|
num_test_pids += 1
|
|
num_test_imgs += len(img_paths)
|
|
else:
|
|
for img_path in img_paths:
|
|
camid = int(osp.basename(img_path).split('_')[2]) - 1 # make it 0-based
|
|
train.append((img_path, num_train_pids, camid))
|
|
num_train_pids += 1
|
|
num_train_imgs += len(img_paths)
|
|
return train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs
|
|
|
|
print("Creating classic splits (# = 20) ...")
|
|
splits_classic_det, splits_classic_lab = [], []
|
|
for split_ref in mat['testsets'][0]:
|
|
test_split = _deref(split_ref).tolist()
|
|
|
|
# create split for detected images
|
|
train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \
|
|
_extract_classic_split(meta_detected, test_split)
|
|
splits_classic_det.append({
|
|
'train': train, 'query': test, 'gallery': test,
|
|
'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs,
|
|
'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs,
|
|
'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs,
|
|
})
|
|
|
|
# create split for labeled images
|
|
train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \
|
|
_extract_classic_split(meta_labeled, test_split)
|
|
splits_classic_lab.append({
|
|
'train': train, 'query': test, 'gallery': test,
|
|
'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs,
|
|
'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs,
|
|
'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs,
|
|
})
|
|
|
|
write_json(splits_classic_det, self.split_classic_det_json_path)
|
|
write_json(splits_classic_lab, self.split_classic_lab_json_path)
|
|
|
|
def _extract_set(filelist, pids, pid2label, idxs, img_dir, relabel):
|
|
tmp_set = []
|
|
unique_pids = set()
|
|
for idx in idxs:
|
|
img_name = filelist[idx][0]
|
|
camid = int(img_name.split('_')[2]) - 1 # make it 0-based
|
|
pid = pids[idx]
|
|
if relabel: pid = pid2label[pid]
|
|
img_path = osp.join(img_dir, img_name)
|
|
tmp_set.append((img_path, int(pid), camid))
|
|
unique_pids.add(pid)
|
|
return tmp_set, len(unique_pids), len(idxs)
|
|
|
|
def _extract_new_split(split_dict, img_dir):
|
|
train_idxs = split_dict['train_idx'].flatten() - 1 # index-0
|
|
pids = split_dict['labels'].flatten()
|
|
train_pids = set(pids[train_idxs])
|
|
pid2label = {pid: label for label, pid in enumerate(train_pids)}
|
|
query_idxs = split_dict['query_idx'].flatten() - 1
|
|
gallery_idxs = split_dict['gallery_idx'].flatten() - 1
|
|
filelist = split_dict['filelist'].flatten()
|
|
train_info = _extract_set(filelist, pids, pid2label, train_idxs, img_dir, relabel=True)
|
|
query_info = _extract_set(filelist, pids, pid2label, query_idxs, img_dir, relabel=False)
|
|
gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False)
|
|
return train_info, query_info, gallery_info
|
|
|
|
print("Creating new splits for detected images (767/700) ...")
|
|
train_info, query_info, gallery_info = _extract_new_split(
|
|
loadmat(self.split_new_det_mat_path),
|
|
self.imgs_detected_dir,
|
|
)
|
|
splits = [{
|
|
'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0],
|
|
'num_train_pids': train_info[1], 'num_train_imgs': train_info[2],
|
|
'num_query_pids': query_info[1], 'num_query_imgs': query_info[2],
|
|
'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2],
|
|
}]
|
|
write_json(splits, self.split_new_det_json_path)
|
|
|
|
print("Creating new splits for labeled images (767/700) ...")
|
|
train_info, query_info, gallery_info = _extract_new_split(
|
|
loadmat(self.split_new_lab_mat_path),
|
|
self.imgs_labeled_dir,
|
|
)
|
|
splits = [{
|
|
'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0],
|
|
'num_train_pids': train_info[1], 'num_train_imgs': train_info[2],
|
|
'num_query_pids': query_info[1], 'num_query_imgs': query_info[2],
|
|
'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2],
|
|
}]
|
|
write_json(splits, self.split_new_lab_json_path)
|