deep-person-reid/torchreid/datasets/market1501.py

99 lines
3.7 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import glob
import re
import sys
import urllib
import tarfile
import zipfile
import os.path as osp
from scipy.io import loadmat
import numpy as np
import h5py
from scipy.misc import imsave
from .bases import BaseImageDataset
class Market1501(BaseImageDataset):
"""
Market1501
Reference:
Zheng et al. Scalable Person Re-identification: A Benchmark. ICCV 2015.
URL: http://www.liangzheng.org/Project/project_reid.html
Dataset statistics:
# identities: 1501 (+1 for background)
# images: 12936 (train) + 3368 (query) + 15913 (gallery)
"""
dataset_dir = 'market1501'
def __init__(self, root='data', verbose=True, market1501_500k=False, **kwargs):
super(Market1501, self).__init__(root)
self.dataset_dir = osp.join(self.root, self.dataset_dir)
self.train_dir = osp.join(self.dataset_dir, 'bounding_box_train')
self.query_dir = osp.join(self.dataset_dir, 'query')
self.gallery_dir = osp.join(self.dataset_dir, 'bounding_box_test')
self.extra_gallery_dir = osp.join(self.dataset_dir, 'images')
self.market1501_500k = market1501_500k
self._check_before_run()
train = self._process_dir(self.train_dir, relabel=True)
query = self._process_dir(self.query_dir, relabel=False)
gallery = self._process_dir(self.gallery_dir, relabel=False)
if self.market1501_500k:
gallery += self._process_dir(self.extra_gallery_dir, relabel=False)
if verbose:
print('=> Market1501 loaded')
self.print_dataset_statistics(train, query, gallery)
self.train = train
self.query = query
self.gallery = gallery
self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train)
self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query)
self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)
def _check_before_run(self):
"""Check if all files are available before going deeper"""
if not osp.exists(self.dataset_dir):
raise RuntimeError('"{}" is not available'.format(self.dataset_dir))
if not osp.exists(self.train_dir):
raise RuntimeError('"{}" is not available'.format(self.train_dir))
if not osp.exists(self.query_dir):
raise RuntimeError('"{}" is not available'.format(self.query_dir))
if not osp.exists(self.gallery_dir):
raise RuntimeError('"{}" is not available'.format(self.gallery_dir))
if self.market1501_500k and not osp.exists(self.extra_gallery_dir):
raise RuntimeError('"{}" is not available'.format(self.extra_gallery_dir))
def _process_dir(self, dir_path, relabel=False):
img_paths = glob.glob(osp.join(dir_path, '*.jpg'))
pattern = re.compile(r'([-\d]+)_c(\d)')
pid_container = set()
for img_path in img_paths:
pid, _ = map(int, pattern.search(img_path).groups())
if pid == -1: continue # junk images are just ignored
pid_container.add(pid)
pid2label = {pid:label for label, pid in enumerate(pid_container)}
dataset = []
for img_path in img_paths:
pid, camid = map(int, pattern.search(img_path).groups())
if pid == -1: continue # junk images are just ignored
assert 0 <= pid <= 1501 # pid == 0 means background
assert 1 <= camid <= 6
camid -= 1 # index starts from 0
if relabel: pid = pid2label[pid]
dataset.append((img_path, pid, camid))
return dataset