fast-reid/projects/FastAttr/fastattr/datasets/pa100k.py

66 lines
2.0 KiB
Python

# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os.path as osp
import numpy as np
from scipy.io import loadmat
from fastreid.data.datasets import DATASET_REGISTRY
from .bases import Dataset
@DATASET_REGISTRY.register()
class PA100K(Dataset):
"""Pedestrian attribute dataset.
80k training images + 20k test images.
The folder structure should be:
pa100k/
data/ # images
annotation.mat
"""
dataset_dir = 'PA-100K'
def __init__(self, root='', **kwargs):
self.root = root
self.dataset_dir = osp.join(self.root, self.dataset_dir)
self.data_dir = osp.join(self.dataset_dir, "data")
self.anno_mat_path = osp.join(
self.dataset_dir, "annotation.mat"
)
required_files = [self.data_dir, self.anno_mat_path]
self.check_before_run(required_files)
train, val, test, attr_dict = self.extract_data()
super(PA100K, self).__init__(train, val, test, attr_dict=attr_dict, **kwargs)
def extract_data(self):
# anno_mat is a dictionary with keys: ['test_images_name', 'val_images_name',
# 'train_images_name', 'val_label', 'attributes', 'test_label', 'train_label']
anno_mat = loadmat(self.anno_mat_path)
def _extract(key_name, key_label):
names = anno_mat[key_name]
labels = anno_mat[key_label]
num_imgs = names.shape[0]
data = []
for i in range(num_imgs):
name = names[i, 0][0]
attrs = labels[i, :].astype(np.float32)
img_path = osp.join(self.data_dir, name)
data.append((img_path, attrs))
return data
train = _extract('train_images_name', 'train_label')
val = _extract('val_images_name', 'val_label')
test = _extract('test_images_name', 'test_label')
attrs = anno_mat['attributes']
attr_dict = {i: str(attr[0][0]) for i, attr in enumerate(attrs)}
return train, val, test, attr_dict