109 lines
4.2 KiB
Python
109 lines
4.2 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import print_function
|
|
from __future__ import division
|
|
|
|
import sys
|
|
import os
|
|
import os.path as osp
|
|
import glob
|
|
import warnings
|
|
|
|
from torchreid.data.datasets import VideoDataset
|
|
from torchreid.utils import read_json, write_json
|
|
|
|
|
|
class DukeMTMCVidReID(VideoDataset):
|
|
"""DukeMTMCVidReID
|
|
|
|
Reference:
|
|
Wu et al. Exploit the Unknown Gradually: One-Shot Video-Based Person
|
|
Re-Identification by Stepwise Learning. CVPR 2018.
|
|
|
|
URL: https://github.com/Yu-Wu/DukeMTMC-VideoReID
|
|
|
|
Dataset statistics:
|
|
identities: 702 (train) + 702 (test)
|
|
tracklets: 2196 (train) + 2636 (test)
|
|
"""
|
|
dataset_dir = 'dukemtmc-vidreid'
|
|
dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-VideoReID.zip'
|
|
|
|
def __init__(self, root='', min_seq_len=0, **kwargs):
|
|
self.root = osp.abspath(osp.expanduser(root))
|
|
self.dataset_dir = osp.join(self.root, self.dataset_dir)
|
|
self.download_dataset(self.dataset_dir, self.dataset_url)
|
|
|
|
self.train_dir = osp.join(self.dataset_dir, 'DukeMTMC-VideoReID/train')
|
|
self.query_dir = osp.join(self.dataset_dir, 'DukeMTMC-VideoReID/query')
|
|
self.gallery_dir = osp.join(self.dataset_dir, 'DukeMTMC-VideoReID/gallery')
|
|
self.split_train_json_path = osp.join(self.dataset_dir, 'split_train.json')
|
|
self.split_query_json_path = osp.join(self.dataset_dir, 'split_query.json')
|
|
self.split_gallery_json_path = osp.join(self.dataset_dir, 'split_gallery.json')
|
|
self.min_seq_len = min_seq_len
|
|
|
|
required_files = [
|
|
self.dataset_dir,
|
|
self.train_dir,
|
|
self.query_dir,
|
|
self.gallery_dir
|
|
]
|
|
self.check_before_run(required_files)
|
|
|
|
train = self.process_dir(self.train_dir, self.split_train_json_path, relabel=True)
|
|
query = self.process_dir(self.query_dir, self.split_query_json_path, relabel=False)
|
|
gallery = self.process_dir(self.gallery_dir, self.split_gallery_json_path, relabel=False)
|
|
|
|
super(DukeMTMCVidReID, self).__init__(train, query, gallery, **kwargs)
|
|
|
|
def process_dir(self, dir_path, json_path, relabel):
|
|
if osp.exists(json_path):
|
|
split = read_json(json_path)
|
|
return split['tracklets']
|
|
|
|
print('=> Generating split json file (** this might take a while **)')
|
|
pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store
|
|
print('Processing "{}" with {} person identities'.format(dir_path, len(pdirs)))
|
|
|
|
pid_container = set()
|
|
for pdir in pdirs:
|
|
pid = int(osp.basename(pdir))
|
|
pid_container.add(pid)
|
|
pid2label = {pid:label for label, pid in enumerate(pid_container)}
|
|
|
|
tracklets = []
|
|
for pdir in pdirs:
|
|
pid = int(osp.basename(pdir))
|
|
if relabel:
|
|
pid = pid2label[pid]
|
|
tdirs = glob.glob(osp.join(pdir, '*'))
|
|
for tdir in tdirs:
|
|
raw_img_paths = glob.glob(osp.join(tdir, '*.jpg'))
|
|
num_imgs = len(raw_img_paths)
|
|
|
|
if num_imgs < self.min_seq_len:
|
|
continue
|
|
|
|
img_paths = []
|
|
for img_idx in range(num_imgs):
|
|
# some tracklet starts from 0002 instead of 0001
|
|
img_idx_name = 'F' + str(img_idx+1).zfill(4)
|
|
res = glob.glob(osp.join(tdir, '*' + img_idx_name + '*.jpg'))
|
|
if len(res) == 0:
|
|
warnings.warn('Index name {} in {} is missing, skip'.format(img_idx_name, tdir))
|
|
continue
|
|
img_paths.append(res[0])
|
|
img_name = osp.basename(img_paths[0])
|
|
if img_name.find('_') == -1:
|
|
# old naming format: 0001C6F0099X30823.jpg
|
|
camid = int(img_name[5]) - 1
|
|
else:
|
|
# new naming format: 0001_C6_F0099_X30823.jpg
|
|
camid = int(img_name[6]) - 1
|
|
img_paths = tuple(img_paths)
|
|
tracklets.append((img_paths, pid, camid))
|
|
|
|
print('Saving split to {}'.format(json_path))
|
|
split_dict = {'tracklets': tracklets}
|
|
write_json(split_dict, json_path)
|
|
|
|
return tracklets |