diff --git a/fastreid/data/build.py b/fastreid/data/build.py index 92202b2..a073f94 100644 --- a/fastreid/data/build.py +++ b/fastreid/data/build.py @@ -54,6 +54,7 @@ def build_reid_train_loader(cfg): num_workers=num_workers, batch_sampler=batch_sampler, collate_fn=fast_batch_collator, + pin_memory=True, ) return train_loader @@ -61,7 +62,7 @@ def build_reid_train_loader(cfg): def build_reid_test_loader(cfg, dataset_name): cfg = cfg.clone() cfg.defrost() - + dataset = DATASET_REGISTRY.get(dataset_name)(root=_root) if comm.is_main_process(): dataset.show_test() @@ -77,7 +78,9 @@ def build_reid_test_loader(cfg, dataset_name): test_set, batch_sampler=batch_sampler, num_workers=0, # save some memory - collate_fn=fast_batch_collator) + collate_fn=fast_batch_collator, + pin_memory=True, + ) return test_loader, len(dataset.query) diff --git a/fastreid/data/common.py b/fastreid/data/common.py index 959fefb..82feb3b 100644 --- a/fastreid/data/common.py +++ b/fastreid/data/common.py @@ -17,10 +17,17 @@ class CommDataset(Dataset): self.transform = transform self.relabel = relabel - pid_set = set([i[1] for i in img_items]) + pid_set = set() + cam_set = set() + for i in img_items: + pid_set.add(i[1]) + cam_set.add(i[2]) self.pids = sorted(list(pid_set)) - if relabel: self.pid_dict = dict([(p, i) for i, p in enumerate(self.pids)]) + self.cams = sorted(list(cam_set)) + if relabel: + self.pid_dict = dict([(p, i) for i, p in enumerate(self.pids)]) + self.cam_dict = dict([(p, i) for i, p in enumerate(self.cams)]) def __len__(self): return len(self.img_items) @@ -29,14 +36,20 @@ class CommDataset(Dataset): img_path, pid, camid = self.img_items[index] img = read_image(img_path) if self.transform is not None: img = self.transform(img) - if self.relabel: pid = self.pid_dict[pid] + if self.relabel: + pid = self.pid_dict[pid] + camid = self.cam_dict[camid] return { "images": img, "targets": pid, - "camid": camid, - "img_path": img_path + "camids": camid, + "img_paths": img_path, } @property def num_classes(self): return len(self.pids) + + @property + def num_cameras(self): + return len(self.cams) diff --git a/fastreid/data/datasets/AirportALERT.py b/fastreid/data/datasets/AirportALERT.py new file mode 100644 index 0000000..0ffece6 --- /dev/null +++ b/fastreid/data/datasets/AirportALERT.py @@ -0,0 +1,46 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset + +__all__ = ['AirportALERT', ] + + +@DATASET_REGISTRY.register() +class AirportALERT(ImageDataset): + dataset_dir = "AirportALERT" + dataset_name = "airport" + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + self.train_file = os.path.join(self.root, self.dataset_dir, 'filepath.txt') + + required_files = [self.train_file, self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path, self.train_file) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, dir_path, train_file): + data = [] + with open(train_file, "r") as f: + img_paths = [line.strip('\n') for line in f.readlines()] + + for path in img_paths: + split_path = path.split('\\') + img_path = '/'.join(split_path) + camid = self.dataset_name + "_" + split_path[0] + pid = self.dataset_name + "_" + split_path[1] + img_path = os.path.join(dir_path, img_path) + if 11001 <= int(split_path[1]) <= 401999: + data.append([img_path, pid, camid]) + + return data diff --git a/fastreid/data/datasets/__init__.py b/fastreid/data/datasets/__init__.py index 8518cb9..80e7467 100644 --- a/fastreid/data/datasets/__init__.py +++ b/fastreid/data/datasets/__init__.py @@ -12,10 +12,24 @@ Registry for datasets It must returns an instance of :class:`Backbone`. """ +# Person re-id datasets from .cuhk03 import CUHK03 from .dukemtmcreid import DukeMTMC from .market1501 import Market1501 from .msmt17 import MSMT17 +from .AirportALERT import AirportALERT +from .iLIDS import iLIDS +from .pku import PKU +from .prai import PRAI +from .sensereid import SenseReID +from .sysu_mm import SYSU_mm +from .thermalworld import Thermalworld +from .pes3d import PeS3D +from .caviara import CAVIARa +from .viper import VIPeR +from .lpw import LPW +from .shinpuhkan import Shinpuhkan +# Vehicle re-id datasets from .veri import VeRi from .vehicleid import VehicleID, SmallVehicleID, MediumVehicleID, LargeVehicleID from .veriwild import VeRiWild, SmallVeRiWild, MediumVeRiWild, LargeVeRiWild diff --git a/fastreid/data/datasets/caviara.py b/fastreid/data/datasets/caviara.py new file mode 100644 index 0000000..9403671 --- /dev/null +++ b/fastreid/data/datasets/caviara.py @@ -0,0 +1,46 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os +from scipy.io import loadmat +from glob import glob + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset +import pdb +import random +import numpy as np + +__all__ = ['CAVIARa',] + + +@DATASET_REGISTRY.register() +class CAVIARa(ImageDataset): + dataset_dir = "CAVIARa" + dataset_name = "caviara" + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + + img_list = glob(os.path.join(train_path, "*.jpg")) + for img_path in img_list: + img_name = img_path.split('/')[-1] + pid = self.dataset_name + "_" + img_name[:4] + camid = self.dataset_name + "_cam0" + data.append([img_path, pid, camid]) + + return data diff --git a/fastreid/data/datasets/cuhk03.py b/fastreid/data/datasets/cuhk03.py index 5c92fc8..96fdaa5 100644 --- a/fastreid/data/datasets/cuhk03.py +++ b/fastreid/data/datasets/cuhk03.py @@ -76,7 +76,8 @@ class CUHK03(ImageDataset): tmp_train = [] for img_path, pid, camid in train: new_pid = self.dataset_name + "_" + str(pid) - tmp_train.append((img_path, new_pid, camid)) + new_camid = self.dataset_name + "_" + str(camid) + tmp_train.append((img_path, new_pid, new_camid)) train = tmp_train del tmp_train query = split['query'] diff --git a/fastreid/data/datasets/dukemtmcreid.py b/fastreid/data/datasets/dukemtmcreid.py index 6e83d80..cd90ce2 100644 --- a/fastreid/data/datasets/dukemtmcreid.py +++ b/fastreid/data/datasets/dukemtmcreid.py @@ -64,6 +64,7 @@ class DukeMTMC(ImageDataset): camid -= 1 # index starts from 0 if is_train: pid = self.dataset_name + "_" + str(pid) + camid = self.dataset_name + "_" + str(camid) data.append((img_path, pid, camid)) return data diff --git a/fastreid/data/datasets/iLIDS.py b/fastreid/data/datasets/iLIDS.py new file mode 100644 index 0000000..9d9a412 --- /dev/null +++ b/fastreid/data/datasets/iLIDS.py @@ -0,0 +1,43 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os +from glob import glob + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset + +__all__ = ['iLIDS', ] + + +@DATASET_REGISTRY.register() +class iLIDS(ImageDataset): + dataset_dir = "iLIDS" + dataset_name = "ilids" + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + file_path = os.listdir(train_path) + for pid_dir in file_path: + img_file = os.path.join(train_path, pid_dir) + img_paths = glob(os.path.join(img_file, "*.png")) + for img_path in img_paths: + split_path = img_path.split('/') + pid = self.dataset_name + "_" + split_path[-2] + camid = self.dataset_name + "_" + split_path[-1].split('_')[0] + data.append([img_path, pid, camid]) + return data diff --git a/fastreid/data/datasets/lpw.py b/fastreid/data/datasets/lpw.py new file mode 100644 index 0000000..a7f6117 --- /dev/null +++ b/fastreid/data/datasets/lpw.py @@ -0,0 +1,47 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os +from glob import glob + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset + +__all__ = ['LPW', ] + + +@DATASET_REGISTRY.register() +class LPW(ImageDataset): + dataset_dir = "pep_256x128" + dataset_name = "lpw" + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + + file_path_list = ['scen1', 'scen2', 'scen3'] + + for scene in file_path_list: + cam_list = os.listdir(os.path.join(train_path, scene)) + for cam in cam_list: + camid = self.dataset_name + "_" + cam + pid_list = os.listdir(os.path.join(train_path, scene, cam)) + for pid_dir in pid_list: + img_paths = glob(os.path.join(train_path, scene, cam, pid_dir, "*.jpg")) + for img_path in img_paths: + pid = self.dataset_name + "_" + scene + "-" + pid_dir + data.append([img_path, pid, camid]) + return data diff --git a/fastreid/data/datasets/market1501.py b/fastreid/data/datasets/market1501.py index 9460533..d1968af 100644 --- a/fastreid/data/datasets/market1501.py +++ b/fastreid/data/datasets/market1501.py @@ -84,6 +84,7 @@ class Market1501(ImageDataset): camid -= 1 # index starts from 0 if is_train: pid = self.dataset_name + "_" + str(pid) + camid = self.dataset_name + "_" + str(camid) data.append((img_path, pid, camid)) return data diff --git a/fastreid/data/datasets/msmt17.py b/fastreid/data/datasets/msmt17.py index 85ea9be..3d77d2b 100644 --- a/fastreid/data/datasets/msmt17.py +++ b/fastreid/data/datasets/msmt17.py @@ -108,6 +108,7 @@ class MSMT17(ImageDataset): img_path = osp.join(dir_path, img_path) if is_train: pid = self.dataset_name + "_" + str(pid) + camid = self.dataset_name + "_" + str(camid) data.append((img_path, pid, camid)) return data diff --git a/fastreid/data/datasets/pes3d.py b/fastreid/data/datasets/pes3d.py new file mode 100644 index 0000000..4b4897e --- /dev/null +++ b/fastreid/data/datasets/pes3d.py @@ -0,0 +1,46 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os +from scipy.io import loadmat +from glob import glob + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset +import pdb +import random +import numpy as np + +__all__ = ['PeS3D',] + + +@DATASET_REGISTRY.register() +class PeS3D(ImageDataset): + dataset_dir = "3DPeS" + dataset_name = "pes3d" + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + + pid_list = os.listdir(train_path) + for pid_dir in pid_list: + pid = self.dataset_name + "_" + pid_dir + img_list = glob(os.path.join(train_path, pid_dir, "*.bmp")) + for img_path in img_list: + camid = self.dataset_name + "_cam0" + data.append([img_path, pid, camid]) + return data diff --git a/fastreid/data/datasets/pku.py b/fastreid/data/datasets/pku.py new file mode 100644 index 0000000..0082d29 --- /dev/null +++ b/fastreid/data/datasets/pku.py @@ -0,0 +1,42 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os +from glob import glob + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset + +__all__ = ['PKU', ] + + +@DATASET_REGISTRY.register() +class PKU(ImageDataset): + dataset_dir = "PKUv1a_128x48" + dataset_name = 'pku' + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + img_paths = glob(os.path.join(train_path, "*.png")) + + for img_path in img_paths: + split_path = img_path.split('/') + img_info = split_path[-1].split('_') + pid = self.dataset_name + "_" + img_info[0] + camid = self.dataset_name + "_" + img_info[1] + data.append([img_path, pid, camid]) + return data diff --git a/fastreid/data/datasets/prai.py b/fastreid/data/datasets/prai.py new file mode 100644 index 0000000..42a0223 --- /dev/null +++ b/fastreid/data/datasets/prai.py @@ -0,0 +1,44 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os +from scipy.io import loadmat +from glob import glob + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset +import pdb + +__all__ = ['PRAI',] + + +@DATASET_REGISTRY.register() +class PRAI(ImageDataset): + dataset_dir = "PRAI-1581" + dataset_name = 'prai' + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir, 'images') + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + img_paths = glob(os.path.join(train_path, "*.jpg")) + for img_path in img_paths: + split_path = img_path.split('/') + img_info = split_path[-1].split('_') + pid = self.dataset_name + "_" + img_info[0] + camid = self.dataset_name + "_" + img_info[1] + data.append([img_path, pid, camid]) + return data + diff --git a/fastreid/data/datasets/sensereid.py b/fastreid/data/datasets/sensereid.py new file mode 100644 index 0000000..92d2c16 --- /dev/null +++ b/fastreid/data/datasets/sensereid.py @@ -0,0 +1,45 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os +from glob import glob + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset + +__all__ = ['SenseReID', ] + + +@DATASET_REGISTRY.register() +class SenseReID(ImageDataset): + dataset_dir = "SenseReID" + dataset_name = "senseid" + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + file_path_list = ['test_gallery', 'test_prob'] + + for file_path in file_path_list: + sub_file = os.path.join(train_path, file_path) + img_name = glob(os.path.join(sub_file, "*.jpg")) + for img_path in img_name: + img_name = img_path.split('/')[-1] + img_info = img_name.split('_') + pid = self.dataset_name + "_" + img_info[0] + camid = self.dataset_name + "_" + img_info[1].split('.')[0] + data.append([img_path, pid, camid]) + return data diff --git a/fastreid/data/datasets/shinpuhkan.py b/fastreid/data/datasets/shinpuhkan.py new file mode 100644 index 0000000..ae240c1 --- /dev/null +++ b/fastreid/data/datasets/shinpuhkan.py @@ -0,0 +1,46 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset + +__all__ = ['Shinpuhkan', ] + + +@DATASET_REGISTRY.register() +class Shinpuhkan(ImageDataset): + dataset_dir = "shinpuhkan" + dataset_name = 'shinpuhkan' + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + + for root, dirs, files in os.walk(train_path): + img_names = list(filter(lambda x: x.endswith(".jpg"), files)) + # fmt: off + if len(img_names) == 0: continue + # fmt: on + for img_name in img_names: + img_path = os.path.join(root, img_name) + split_path = img_name.split('_') + pid = self.dataset_name + "_" + split_path[0] + camid = self.dataset_name + "_" + split_path[2] + data.append((img_path, pid, camid)) + + return data diff --git a/fastreid/data/datasets/sysu_mm.py b/fastreid/data/datasets/sysu_mm.py new file mode 100644 index 0000000..bf67fff --- /dev/null +++ b/fastreid/data/datasets/sysu_mm.py @@ -0,0 +1,48 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os +from scipy.io import loadmat +from glob import glob + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset +import pdb + +__all__ = ['SYSU_mm', ] + + +@DATASET_REGISTRY.register() +class SYSU_mm(ImageDataset): + dataset_dir = "SYSU-MM01" + dataset_name = "sysumm01" + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + + file_path_list = ['cam1', 'cam2', 'cam4', 'cam5'] + + for file_path in file_path_list: + camid = self.dataset_name + "_" + file_path + pid_list = os.listdir(os.path.join(train_path, file_path)) + for pid_dir in pid_list: + pid = self.dataset_name + "_" + pid_dir + img_list = glob(os.path.join(train_path, file_path, pid_dir, "*.jpg")) + for img_path in img_list: + data.append([img_path, pid, camid]) + return data + diff --git a/fastreid/data/datasets/thermalworld.py b/fastreid/data/datasets/thermalworld.py new file mode 100644 index 0000000..b6df954 --- /dev/null +++ b/fastreid/data/datasets/thermalworld.py @@ -0,0 +1,45 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os +from scipy.io import loadmat +from glob import glob + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset +import pdb +import random +import numpy as np + +__all__ = ['Thermalworld',] + + +@DATASET_REGISTRY.register() +class Thermalworld(ImageDataset): + dataset_dir = "thermalworld_rgb" + dataset_name = "thermalworld" + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + pid_list = os.listdir(train_path) + for pid_dir in pid_list: + pid = self.dataset_name + "_" + pid_dir + img_list = glob(os.path.join(train_path, pid_dir, "*.jpg")) + for img_path in img_list: + camid = self.dataset_name + "_cam0" + data.append([img_path, pid, camid]) + return data diff --git a/fastreid/data/datasets/veri.py b/fastreid/data/datasets/veri.py index 7e3b166..b585c1e 100644 --- a/fastreid/data/datasets/veri.py +++ b/fastreid/data/datasets/veri.py @@ -62,6 +62,7 @@ class VeRi(ImageDataset): camid -= 1 # index starts from 0 if is_train: pid = self.dataset_name + "_" + str(pid) + camid = self.dataset_name + "_" + str(camid) data.append((img_path, pid, camid)) return data diff --git a/fastreid/data/datasets/viper.py b/fastreid/data/datasets/viper.py new file mode 100644 index 0000000..730264f --- /dev/null +++ b/fastreid/data/datasets/viper.py @@ -0,0 +1,45 @@ +# encoding: utf-8 +""" +@author: xingyu liao +@contact: sherlockliao01@gmail.com +""" + +import os +from glob import glob + +from fastreid.data.datasets import DATASET_REGISTRY +from fastreid.data.datasets.bases import ImageDataset + +__all__ = ['VIPeR', ] + + +@DATASET_REGISTRY.register() +class VIPeR(ImageDataset): + dataset_dir = "VIPeR" + dataset_name = "viper" + + def __init__(self, root='datasets', **kwargs): + self.root = root + self.train_path = os.path.join(self.root, self.dataset_dir) + + required_files = [self.train_path] + self.check_before_run(required_files) + + train = self.process_train(self.train_path) + + super().__init__(train, [], [], **kwargs) + + def process_train(self, train_path): + data = [] + + file_path_list = ['cam_a', 'cam_b'] + + for file_path in file_path_list: + camid = self.dataset_name + "_" + file_path + img_list = glob(os.path.join(train_path, file_path, "*.bmp")) + for img_path in img_list: + img_name = img_path.split('/')[-1] + pid = self.dataset_name + "_" + img_name.split('_')[0] + data.append([img_path, pid, camid]) + + return data diff --git a/fastreid/data/samplers/triplet_sampler.py b/fastreid/data/samplers/triplet_sampler.py index 4766ed3..a9bcb34 100644 --- a/fastreid/data/samplers/triplet_sampler.py +++ b/fastreid/data/samplers/triplet_sampler.py @@ -95,7 +95,6 @@ class BalancedIdentitySampler(Sampler): if len(ret) == self.batch_size: yield from ret - del ret ret = [] @@ -167,5 +166,4 @@ class NaiveIdentitySampler(Sampler): assert len(batch_indices) == self.batch_size, f"batch indices have wrong " \ f"length with {len(batch_indices)}!" yield from batch_indices - del batch_indices batch_indices = []