v1.4.0: added cuhksysu

pull/462/head^2
kaiyangzhou 2021-04-27 15:31:30 +08:00
parent 5bd26d8aa2
commit c4639e91a0
7 changed files with 88 additions and 3 deletions

View File

@ -33,6 +33,7 @@ You can find some research projects that are built on top of Torchreid `here <ht
What's new
------------
- [Apr 2021] ``v1.4.0``: We added the person search dataset, `CUHK-SYSU <http://www.ee.cuhk.edu.hk/~xgwang/PS/dataset.html>`_. Please see the `documentation <https://kaiyangzhou.github.io/deep-person-reid/>`_ regarding how to download the dataset (it contains cropped person images).
- [Apr 2021] All models in the model zoo have been moved to google drive. Please raise an issue if any model's performance is inconsistent with the numbers shown in the model zoo page (could be caused by wrong links).
- [Mar 2021] `OSNet <https://arxiv.org/abs/1910.06827>`_ will appear in the TPAMI journal! Compared with the conference version, which focuses on discriminative feature learning using the omni-scale building block, this journal extension further considers generalizable feature learning by integrating `instance normalization layers <https://arxiv.org/abs/1607.08022>`_ with the OSNet architecture. We hope this journal paper can motivate more future work to taclke the generalization issue in cross-dataset re-ID.
- [Mar 2021] Generalization across domains (datasets) in person re-ID is crucial in real-world applications, which is closely related to the topic of *domain generalization*. Interested in learning how the field of domain generalization has developed over the last decade? Check our recent survey in this topic at https://arxiv.org/abs/2103.02503, with coverage on the history, datasets, related problems, methodologies, potential directions, and so on (*methods designed for generalizable re-ID are also covered*!).

View File

@ -189,6 +189,19 @@ CUHK02 (``cuhk02``)
P4/
P5/
CUHKSYSU (``cuhksysu``)
^^^^^^^^^^^^^^^^^^^^^^^^^^
- Create a folder named "cuhksysu" under ``$REID``.
- Download the data to "cuhksysu/" from this `google drive link <https://drive.google.com/file/d/1XmiNVrfK2ZmI0ZZ2HHT80HHbDrnE4l3W/view?usp=sharing>`_.
- Extract the zip file under "cuhksysu/".
- The data structure should look like
.. code-block:: none
cuhksysu/
cropped_images
Video Datasets
--------------

View File

@ -2,7 +2,7 @@ from __future__ import print_function, absolute_import
from torchreid import data, optim, utils, engine, losses, models, metrics
__version__ = '1.3.6'
__version__ = '1.4.0'
__author__ = 'Kaiyang Zhou'
__homepage__ = 'https://kaiyangzhou.github.io/'
__description__ = 'Deep learning person re-identification in PyTorch'

View File

@ -2,7 +2,7 @@ from __future__ import print_function, absolute_import
from .image import (
GRID, PRID, CUHK01, CUHK02, CUHK03, MSMT17, VIPeR, SenseReID, Market1501,
DukeMTMCreID, University1652, iLIDS
DukeMTMCreID, University1652, iLIDS, CUHKSYSU
)
from .video import PRID2011, Mars, DukeMTMCVidReID, iLIDSVID
from .dataset import Dataset, ImageDataset, VideoDataset
@ -19,7 +19,8 @@ __image_datasets = {
'sensereid': SenseReID,
'prid': PRID,
'cuhk02': CUHK02,
'university1652': University1652
'university1652': University1652,
'cuhksysu': CUHKSYSU
}
__video_datasets = {

View File

@ -35,6 +35,10 @@ class Dataset(object):
# combineall=True
_junk_pids = []
# Some datasets are only used for training, like CUHK-SYSU
# In this case, "combineall=True" is not used for them
_train_only = False
def __init__(
self,
train,
@ -180,6 +184,9 @@ class Dataset(object):
def combine_all(self):
"""Combines train, query and gallery in a dataset for training."""
if self._train_only:
return
combined = copy.deepcopy(self.train)
# relabel pids in gallery (query shares the same scope)

View File

@ -7,6 +7,7 @@ from .viper import VIPeR
from .cuhk01 import CUHK01
from .cuhk02 import CUHK02
from .cuhk03 import CUHK03
from .cuhksysu import CUHKSYSU
from .msmt17 import MSMT17
from .sensereid import SenseReID
from .market1501 import Market1501

View File

@ -0,0 +1,62 @@
from __future__ import division, print_function, absolute_import
import sys
import os
import os.path as osp
import glob
import copy
from ..dataset import ImageDataset
class CUHKSYSU(ImageDataset):
"""CUHKSYSU.
This dataset can only be used for model training.
Reference:
Xiao et al. End-to-end deep learning for person search.
URL: `<http://www.ee.cuhk.edu.hk/~xgwang/PS/dataset.html>`_
Dataset statistics:
- identities: 11,934
- images: 34,574
"""
_train_only = True
dataset_dir = 'cuhksysu'
def __init__(self, root='', **kwargs):
self.root = osp.abspath(osp.expanduser(root))
self.dataset_dir = osp.join(self.root, self.dataset_dir)
self.data_dir = osp.join(self.dataset_dir, 'cropped_images')
# image name format: p11422_s16929_1.jpg
train = self.process_dir(self.data_dir )
query = [copy.deepcopy(train[0])]
gallery = [copy.deepcopy(train[0])]
super(CUHKSYSU, self).__init__(train, query, gallery, **kwargs)
def process_dir(self, dirname):
img_paths = glob.glob(osp.join(dirname, '*.jpg'))
num_imgs = len(img_paths)
# get all identities:
pid_container = set()
for img_path in img_paths:
img_name = osp.basename(img_path)
pid = img_name.split('_')[0]
pid_container.add(pid)
pid2label = {pid: label for label, pid in enumerate(pid_container)}
num_pids = len(pid_container)
# extract data
data = []
for img_path in img_paths:
img_name = osp.basename(img_path)
pid = img_name.split('_')[0]
label = pid2label[pid]
data.append((img_path, label, 0)) # dummy camera id
return data