add Market1501Attr and DukeMTMCAttr dataloader

pull/406/head
Jinkai Zheng 2021-02-05 22:36:54 +08:00
parent c9537c97d1
commit 39e25e0457
5 changed files with 188 additions and 0 deletions

View File

@ -0,0 +1,11 @@
_BASE_: Base-attribute.yml
DATASETS:
NAMES: ("DukeMTMCAttr",)
TESTS: ("DukeMTMCAttr",)
MODEL:
HEADS:
NUM_CLASSES: 23
OUTPUT_DIR: projects/FastAttr/logs/dukemtmc/strong_baseline

View File

@ -0,0 +1,11 @@
_BASE_: Base-attribute.yml
DATASETS:
NAMES: ("Market1501Attr",)
TESTS: ("Market1501Attr",)
MODEL:
HEADS:
NUM_CLASSES: 27
OUTPUT_DIR: projects/FastAttr/logs/market1501/strong_baseline

View File

@ -6,3 +6,5 @@
# Attributed datasets
from .pa100k import PA100K
from .market1501attr import Market1501Attr
from .dukemtmcattr import DukeMTMCAttr

View File

@ -0,0 +1,74 @@
# encoding: utf-8
"""
@author: liaoxingyu
@contact: liaoxingyu2@jd.com
"""
import glob
import os.path as osp
import re
import mat4py
import numpy as np
from fastreid.data.datasets import DATASET_REGISTRY
from .bases import Dataset
@DATASET_REGISTRY.register()
class DukeMTMCAttr(Dataset):
"""DukeMTMCAttr.
Reference:
Lin, Yutian, et al. "Improving person re-identification by attribute and identity learning."
Pattern Recognition 95 (2019): 151-161.
URL: `<https://github.com/vana77/DukeMTMC-attribute>`_
The folder structure should be:
DukeMTMC-reID/
bounding_box_train/ # images
bounding_box_test/ # images
duke_attribute.mat
"""
dataset_dir = 'DukeMTMC-reID'
dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-reID.zip'
dataset_name = "dukemtmc"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.dataset_dir = osp.join(self.root, self.dataset_dir)
self.train_dir = osp.join(self.dataset_dir, 'bounding_box_train')
self.test_dir = osp.join(self.dataset_dir, 'bounding_box_test')
required_files = [
self.dataset_dir,
self.train_dir,
self.test_dir,
]
self.check_before_run(required_files)
duke_attr = mat4py.loadmat(osp.join(self.dataset_dir, 'duke_attribute.mat'))['duke_attribute']
sorted_attrs = sorted(duke_attr['train'].keys())
sorted_attrs.remove('image_index')
attr_dict = {i: str(attr) for i, attr in enumerate(sorted_attrs)}
train = self.process_dir(self.train_dir, duke_attr['train'], sorted_attrs)
test = val = self.process_dir(self.test_dir, duke_attr['test'], sorted_attrs)
super(DukeMTMCAttr, self).__init__(train, val, test, attr_dict=attr_dict, **kwargs)
def process_dir(self, dir_path, annotation, sorted_attrs):
img_paths = glob.glob(osp.join(dir_path, '*.jpg'))
pattern = re.compile(r'([-\d]+)_c(\d)')
data = []
for img_path in img_paths:
pid, camid = map(int, pattern.search(img_path).groups())
assert 1 <= camid <= 8
img_index = annotation['image_index'].index(str(pid).zfill(4))
attrs = np.array([int(annotation[i][img_index]) - 1 for i in sorted_attrs], dtype=np.float32)
data.append((img_path, attrs))
return data

View File

@ -0,0 +1,90 @@
# encoding: utf-8
"""
@author: sherlock
@contact: sherlockliao01@gmail.com
"""
import glob
import os.path as osp
import re
import warnings
import mat4py
import numpy as np
from fastreid.data.datasets import DATASET_REGISTRY
from .bases import Dataset
@DATASET_REGISTRY.register()
class Market1501Attr(Dataset):
"""Market1501Attr.
Reference:
Lin, Yutian, et al. "Improving person re-identification by attribute and identity learning."
Pattern Recognition 95 (2019): 151-161.
URL: `<https://github.com/vana77/Market-1501_Attribute>`_
The folder structure should be:
Market-1501-v15.09.15/
bounding_box_train/ # images
bounding_box_test/ # images
market_attribute.mat
"""
_junk_pids = [0, -1]
dataset_dir = ''
dataset_url = 'http://188.138.127.15:81/Datasets/Market-1501-v15.09.15.zip'
dataset_name = "market1501"
def __init__(self, root='datasets', market1501_500k=False, **kwargs):
self.root = root
self.dataset_dir = osp.join(self.root, self.dataset_dir)
# allow alternative directory structure
self.data_dir = self.dataset_dir
data_dir = osp.join(self.data_dir, 'Market-1501-v15.09.15')
if osp.isdir(data_dir):
self.data_dir = data_dir
else:
warnings.warn('The current data structure is deprecated. Please '
'put data folders such as "bounding_box_train" under '
'"Market-1501-v15.09.15".')
self.train_dir = osp.join(self.data_dir, 'bounding_box_train')
self.test_dir = osp.join(self.data_dir, 'bounding_box_test')
required_files = [
self.data_dir,
self.train_dir,
self.test_dir,
]
self.check_before_run(required_files)
market_attr = mat4py.loadmat(osp.join(self.data_dir, 'market_attribute.mat'))['market_attribute']
sorted_attrs = sorted(market_attr['train'].keys())
sorted_attrs.remove('image_index')
attr_dict = {i: str(attr) for i, attr in enumerate(sorted_attrs)}
train = self.process_dir(self.train_dir, market_attr['train'], sorted_attrs)
test = val = self.process_dir(self.test_dir, market_attr['test'], sorted_attrs)
super(Market1501Attr, self).__init__(train, val, test, attr_dict=attr_dict, **kwargs)
def process_dir(self, dir_path, annotation, sorted_attrs):
img_paths = glob.glob(osp.join(dir_path, '*.jpg'))
pattern = re.compile(r'([-\d]+)_c(\d)')
data = []
for img_path in img_paths:
pid, camid = map(int, pattern.search(img_path).groups())
if pid == -1 or pid == 0:
continue # junk images are just ignored
assert 0 <= pid <= 1501 # pid == 0 means background
assert 1 <= camid <= 6
img_index = annotation['image_index'].index(str(pid).zfill(4))
attrs = np.array([int(annotation[i][img_index])-1 for i in sorted_attrs], dtype=np.float32)
data.append((img_path, attrs))
return data