add more datasets support

pull/259/head
liaoxingyu 2020-09-01 16:13:12 +08:00
parent c1dc4919ef
commit 866a196d19
21 changed files with 586 additions and 10 deletions

View File

@ -54,6 +54,7 @@ def build_reid_train_loader(cfg):
num_workers=num_workers,
batch_sampler=batch_sampler,
collate_fn=fast_batch_collator,
pin_memory=True,
)
return train_loader
@ -77,7 +78,9 @@ def build_reid_test_loader(cfg, dataset_name):
test_set,
batch_sampler=batch_sampler,
num_workers=0, # save some memory
collate_fn=fast_batch_collator)
collate_fn=fast_batch_collator,
pin_memory=True,
)
return test_loader, len(dataset.query)

View File

@ -17,10 +17,17 @@ class CommDataset(Dataset):
self.transform = transform
self.relabel = relabel
pid_set = set([i[1] for i in img_items])
pid_set = set()
cam_set = set()
for i in img_items:
pid_set.add(i[1])
cam_set.add(i[2])
self.pids = sorted(list(pid_set))
if relabel: self.pid_dict = dict([(p, i) for i, p in enumerate(self.pids)])
self.cams = sorted(list(cam_set))
if relabel:
self.pid_dict = dict([(p, i) for i, p in enumerate(self.pids)])
self.cam_dict = dict([(p, i) for i, p in enumerate(self.cams)])
def __len__(self):
return len(self.img_items)
@ -29,14 +36,20 @@ class CommDataset(Dataset):
img_path, pid, camid = self.img_items[index]
img = read_image(img_path)
if self.transform is not None: img = self.transform(img)
if self.relabel: pid = self.pid_dict[pid]
if self.relabel:
pid = self.pid_dict[pid]
camid = self.cam_dict[camid]
return {
"images": img,
"targets": pid,
"camid": camid,
"img_path": img_path
"camids": camid,
"img_paths": img_path,
}
@property
def num_classes(self):
return len(self.pids)
@property
def num_cameras(self):
return len(self.cams)

View File

@ -0,0 +1,46 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
__all__ = ['AirportALERT', ]
@DATASET_REGISTRY.register()
class AirportALERT(ImageDataset):
dataset_dir = "AirportALERT"
dataset_name = "airport"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
self.train_file = os.path.join(self.root, self.dataset_dir, 'filepath.txt')
required_files = [self.train_file, self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path, self.train_file)
super().__init__(train, [], [], **kwargs)
def process_train(self, dir_path, train_file):
data = []
with open(train_file, "r") as f:
img_paths = [line.strip('\n') for line in f.readlines()]
for path in img_paths:
split_path = path.split('\\')
img_path = '/'.join(split_path)
camid = self.dataset_name + "_" + split_path[0]
pid = self.dataset_name + "_" + split_path[1]
img_path = os.path.join(dir_path, img_path)
if 11001 <= int(split_path[1]) <= 401999:
data.append([img_path, pid, camid])
return data

View File

@ -12,10 +12,24 @@ Registry for datasets
It must returns an instance of :class:`Backbone`.
"""
# Person re-id datasets
from .cuhk03 import CUHK03
from .dukemtmcreid import DukeMTMC
from .market1501 import Market1501
from .msmt17 import MSMT17
from .AirportALERT import AirportALERT
from .iLIDS import iLIDS
from .pku import PKU
from .prai import PRAI
from .sensereid import SenseReID
from .sysu_mm import SYSU_mm
from .thermalworld import Thermalworld
from .pes3d import PeS3D
from .caviara import CAVIARa
from .viper import VIPeR
from .lpw import LPW
from .shinpuhkan import Shinpuhkan
# Vehicle re-id datasets
from .veri import VeRi
from .vehicleid import VehicleID, SmallVehicleID, MediumVehicleID, LargeVehicleID
from .veriwild import VeRiWild, SmallVeRiWild, MediumVeRiWild, LargeVeRiWild

View File

@ -0,0 +1,46 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from scipy.io import loadmat
from glob import glob
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
import pdb
import random
import numpy as np
__all__ = ['CAVIARa',]
@DATASET_REGISTRY.register()
class CAVIARa(ImageDataset):
dataset_dir = "CAVIARa"
dataset_name = "caviara"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
img_list = glob(os.path.join(train_path, "*.jpg"))
for img_path in img_list:
img_name = img_path.split('/')[-1]
pid = self.dataset_name + "_" + img_name[:4]
camid = self.dataset_name + "_cam0"
data.append([img_path, pid, camid])
return data

View File

@ -76,7 +76,8 @@ class CUHK03(ImageDataset):
tmp_train = []
for img_path, pid, camid in train:
new_pid = self.dataset_name + "_" + str(pid)
tmp_train.append((img_path, new_pid, camid))
new_camid = self.dataset_name + "_" + str(camid)
tmp_train.append((img_path, new_pid, new_camid))
train = tmp_train
del tmp_train
query = split['query']

View File

@ -64,6 +64,7 @@ class DukeMTMC(ImageDataset):
camid -= 1 # index starts from 0
if is_train:
pid = self.dataset_name + "_" + str(pid)
camid = self.dataset_name + "_" + str(camid)
data.append((img_path, pid, camid))
return data

View File

@ -0,0 +1,43 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from glob import glob
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
__all__ = ['iLIDS', ]
@DATASET_REGISTRY.register()
class iLIDS(ImageDataset):
dataset_dir = "iLIDS"
dataset_name = "ilids"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
file_path = os.listdir(train_path)
for pid_dir in file_path:
img_file = os.path.join(train_path, pid_dir)
img_paths = glob(os.path.join(img_file, "*.png"))
for img_path in img_paths:
split_path = img_path.split('/')
pid = self.dataset_name + "_" + split_path[-2]
camid = self.dataset_name + "_" + split_path[-1].split('_')[0]
data.append([img_path, pid, camid])
return data

View File

@ -0,0 +1,47 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from glob import glob
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
__all__ = ['LPW', ]
@DATASET_REGISTRY.register()
class LPW(ImageDataset):
dataset_dir = "pep_256x128"
dataset_name = "lpw"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
file_path_list = ['scen1', 'scen2', 'scen3']
for scene in file_path_list:
cam_list = os.listdir(os.path.join(train_path, scene))
for cam in cam_list:
camid = self.dataset_name + "_" + cam
pid_list = os.listdir(os.path.join(train_path, scene, cam))
for pid_dir in pid_list:
img_paths = glob(os.path.join(train_path, scene, cam, pid_dir, "*.jpg"))
for img_path in img_paths:
pid = self.dataset_name + "_" + scene + "-" + pid_dir
data.append([img_path, pid, camid])
return data

View File

@ -84,6 +84,7 @@ class Market1501(ImageDataset):
camid -= 1 # index starts from 0
if is_train:
pid = self.dataset_name + "_" + str(pid)
camid = self.dataset_name + "_" + str(camid)
data.append((img_path, pid, camid))
return data

View File

@ -108,6 +108,7 @@ class MSMT17(ImageDataset):
img_path = osp.join(dir_path, img_path)
if is_train:
pid = self.dataset_name + "_" + str(pid)
camid = self.dataset_name + "_" + str(camid)
data.append((img_path, pid, camid))
return data

View File

@ -0,0 +1,46 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from scipy.io import loadmat
from glob import glob
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
import pdb
import random
import numpy as np
__all__ = ['PeS3D',]
@DATASET_REGISTRY.register()
class PeS3D(ImageDataset):
dataset_dir = "3DPeS"
dataset_name = "pes3d"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
pid_list = os.listdir(train_path)
for pid_dir in pid_list:
pid = self.dataset_name + "_" + pid_dir
img_list = glob(os.path.join(train_path, pid_dir, "*.bmp"))
for img_path in img_list:
camid = self.dataset_name + "_cam0"
data.append([img_path, pid, camid])
return data

View File

@ -0,0 +1,42 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from glob import glob
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
__all__ = ['PKU', ]
@DATASET_REGISTRY.register()
class PKU(ImageDataset):
dataset_dir = "PKUv1a_128x48"
dataset_name = 'pku'
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
img_paths = glob(os.path.join(train_path, "*.png"))
for img_path in img_paths:
split_path = img_path.split('/')
img_info = split_path[-1].split('_')
pid = self.dataset_name + "_" + img_info[0]
camid = self.dataset_name + "_" + img_info[1]
data.append([img_path, pid, camid])
return data

View File

@ -0,0 +1,44 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from scipy.io import loadmat
from glob import glob
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
import pdb
__all__ = ['PRAI',]
@DATASET_REGISTRY.register()
class PRAI(ImageDataset):
dataset_dir = "PRAI-1581"
dataset_name = 'prai'
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir, 'images')
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
img_paths = glob(os.path.join(train_path, "*.jpg"))
for img_path in img_paths:
split_path = img_path.split('/')
img_info = split_path[-1].split('_')
pid = self.dataset_name + "_" + img_info[0]
camid = self.dataset_name + "_" + img_info[1]
data.append([img_path, pid, camid])
return data

View File

@ -0,0 +1,45 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from glob import glob
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
__all__ = ['SenseReID', ]
@DATASET_REGISTRY.register()
class SenseReID(ImageDataset):
dataset_dir = "SenseReID"
dataset_name = "senseid"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
file_path_list = ['test_gallery', 'test_prob']
for file_path in file_path_list:
sub_file = os.path.join(train_path, file_path)
img_name = glob(os.path.join(sub_file, "*.jpg"))
for img_path in img_name:
img_name = img_path.split('/')[-1]
img_info = img_name.split('_')
pid = self.dataset_name + "_" + img_info[0]
camid = self.dataset_name + "_" + img_info[1].split('.')[0]
data.append([img_path, pid, camid])
return data

View File

@ -0,0 +1,46 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
__all__ = ['Shinpuhkan', ]
@DATASET_REGISTRY.register()
class Shinpuhkan(ImageDataset):
dataset_dir = "shinpuhkan"
dataset_name = 'shinpuhkan'
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
for root, dirs, files in os.walk(train_path):
img_names = list(filter(lambda x: x.endswith(".jpg"), files))
# fmt: off
if len(img_names) == 0: continue
# fmt: on
for img_name in img_names:
img_path = os.path.join(root, img_name)
split_path = img_name.split('_')
pid = self.dataset_name + "_" + split_path[0]
camid = self.dataset_name + "_" + split_path[2]
data.append((img_path, pid, camid))
return data

View File

@ -0,0 +1,48 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from scipy.io import loadmat
from glob import glob
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
import pdb
__all__ = ['SYSU_mm', ]
@DATASET_REGISTRY.register()
class SYSU_mm(ImageDataset):
dataset_dir = "SYSU-MM01"
dataset_name = "sysumm01"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
file_path_list = ['cam1', 'cam2', 'cam4', 'cam5']
for file_path in file_path_list:
camid = self.dataset_name + "_" + file_path
pid_list = os.listdir(os.path.join(train_path, file_path))
for pid_dir in pid_list:
pid = self.dataset_name + "_" + pid_dir
img_list = glob(os.path.join(train_path, file_path, pid_dir, "*.jpg"))
for img_path in img_list:
data.append([img_path, pid, camid])
return data

View File

@ -0,0 +1,45 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from scipy.io import loadmat
from glob import glob
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
import pdb
import random
import numpy as np
__all__ = ['Thermalworld',]
@DATASET_REGISTRY.register()
class Thermalworld(ImageDataset):
dataset_dir = "thermalworld_rgb"
dataset_name = "thermalworld"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
pid_list = os.listdir(train_path)
for pid_dir in pid_list:
pid = self.dataset_name + "_" + pid_dir
img_list = glob(os.path.join(train_path, pid_dir, "*.jpg"))
for img_path in img_list:
camid = self.dataset_name + "_cam0"
data.append([img_path, pid, camid])
return data

View File

@ -62,6 +62,7 @@ class VeRi(ImageDataset):
camid -= 1 # index starts from 0
if is_train:
pid = self.dataset_name + "_" + str(pid)
camid = self.dataset_name + "_" + str(camid)
data.append((img_path, pid, camid))
return data

View File

@ -0,0 +1,45 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import os
from glob import glob
from fastreid.data.datasets import DATASET_REGISTRY
from fastreid.data.datasets.bases import ImageDataset
__all__ = ['VIPeR', ]
@DATASET_REGISTRY.register()
class VIPeR(ImageDataset):
dataset_dir = "VIPeR"
dataset_name = "viper"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.train_path = os.path.join(self.root, self.dataset_dir)
required_files = [self.train_path]
self.check_before_run(required_files)
train = self.process_train(self.train_path)
super().__init__(train, [], [], **kwargs)
def process_train(self, train_path):
data = []
file_path_list = ['cam_a', 'cam_b']
for file_path in file_path_list:
camid = self.dataset_name + "_" + file_path
img_list = glob(os.path.join(train_path, file_path, "*.bmp"))
for img_path in img_list:
img_name = img_path.split('/')[-1]
pid = self.dataset_name + "_" + img_name.split('_')[0]
data.append([img_path, pid, camid])
return data

View File

@ -95,7 +95,6 @@ class BalancedIdentitySampler(Sampler):
if len(ret) == self.batch_size:
yield from ret
del ret
ret = []
@ -167,5 +166,4 @@ class NaiveIdentitySampler(Sampler):
assert len(batch_indices) == self.batch_size, f"batch indices have wrong " \
f"length with {len(batch_indices)}!"
yield from batch_indices
del batch_indices
batch_indices = []