deep-person-reid/torchreid/dataset_loader.py

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import os
from PIL import Image
import numpy as np
import os.path as osp
import io

import torch
from torch.utils.data import Dataset


def read_image(img_path):
    """Keep reading image until succeed.
    This can avoid IOError incurred by heavy IO process."""
    got_img = False
    if not osp.exists(img_path):
        raise IOError('{} does not exist'.format(img_path))
    while not got_img:
        try:
            img = Image.open(img_path).convert('RGB')
            got_img = True
        except IOError:
            print('IOError incurred when reading "{}". Will redo. Don\'t worry. Just chill.'.format(img_path))
            pass
    return img


class ImageDataset(Dataset):
    """Image Person ReID Dataset"""
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        img_path, pid, camid = self.dataset[index]
        img = read_image(img_path)
        
        if self.transform is not None:
            img = self.transform(img)
        
        return img, pid, camid, img_path


class VideoDataset(Dataset):
    """Video Person ReID Dataset.
    Note batch data has shape (batch, seq_len, channel, height, width).
    """
    _sample_methods = ['evenly', 'random', 'all']

    def __init__(self, dataset, seq_len=15, sample_method='evenly', transform=None):
        self.dataset = dataset
        self.seq_len = seq_len
        self.sample_method = sample_method
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        img_paths, pid, camid = self.dataset[index]
        num = len(img_paths)

        if self.sample_method == 'random':
            """
            Randomly sample seq_len items from num items,
            if num is smaller than seq_len, then replicate items
            """
            indices = np.arange(num)
            replace = False if num >= self.seq_len else True
            indices = np.random.choice(indices, size=self.seq_len, replace=replace)
            # sort indices to keep temporal order (comment it to be order-agnostic)
            indices = np.sort(indices)
        
        elif self.sample_method == 'evenly':
            """
            Evenly sample seq_len items from num items.
            """
            if num >= self.seq_len:
                num -= num % self.seq_len
                indices = np.arange(0, num, num/self.seq_len)
            else:
                # if num is smaller than seq_len, simply replicate the last image
                # until the seq_len requirement is satisfied
                indices = np.arange(0, num)
                num_pads = self.seq_len - num
                indices = np.concatenate([indices, np.ones(num_pads).astype(np.int32)*(num-1)])
            assert len(indices) == self.seq_len
        
        elif self.sample_method == 'all':
            """
            Sample all items, seq_len is useless now and batch_size needs
            to be set to 1.
            """
            indices = np.arange(num)
        
        else:
            raise ValueError('Unknown sample method: {}. Expected one of {}'.format(self.sample_method, self._sample_methods))

        imgs = []
        for index in indices:
            img_path = img_paths[int(index)]
            img = read_image(img_path)
            if self.transform is not None:
                img = self.transform(img)
            img = img.unsqueeze(0)
            imgs.append(img)
        imgs = torch.cat(imgs, dim=0)

        return imgs, pid, camid
standardize code 2018-07-04 10:32:43 +01:00			`from __future__ import absolute_import`
			`from __future__ import print_function`
			`from __future__ import division`
update model & script 2018-07-02 10:17:14 +01:00
first commit 2018-03-11 21:17:48 +00:00			`import os`
			`from PIL import Image`
update readme & dataset_loader 2018-03-12 18:38:12 +00:00			`import numpy as np`
update read_image 2018-05-01 16:30:10 +01:00			`import os.path as osp`
incorporate lmdb 2018-07-02 12:54:45 +01:00			`import io`
first commit 2018-03-11 21:17:48 +00:00
update dataset_loader 2018-03-12 18:39:28 +00:00			`import torch`
first commit 2018-03-11 21:17:48 +00:00			`from torch.utils.data import Dataset`

update model & script 2018-07-02 10:17:14 +01:00
first commit 2018-03-11 21:17:48 +00:00			`def read_image(img_path):`
			`"""Keep reading image until succeed.`
			`This can avoid IOError incurred by heavy IO process."""`
			`got_img = False`
update read_image 2018-05-01 16:30:10 +01:00			`if not osp.exists(img_path):`
update print 2019-01-30 22:41:47 +00:00			`raise IOError('{} does not exist'.format(img_path))`
first commit 2018-03-11 21:17:48 +00:00			`while not got_img:`
			`try:`
			`img = Image.open(img_path).convert('RGB')`
			`got_img = True`
			`except IOError:`
update print 2019-01-30 22:41:47 +00:00			`print('IOError incurred when reading "{}". Will redo. Don\'t worry. Just chill.'.format(img_path))`
first commit 2018-03-11 21:17:48 +00:00			`pass`
			`return img`

update model & script 2018-07-02 10:17:14 +01:00
first commit 2018-03-11 21:17:48 +00:00			`class ImageDataset(Dataset):`
			`"""Image Person ReID Dataset"""`
rm lmdb 2018-08-11 22:22:48 +01:00			`def __init__(self, dataset, transform=None):`
first commit 2018-03-11 21:17:48 +00:00			`self.dataset = dataset`
			`self.transform = transform`

			`def __len__(self):`
			`return len(self.dataset)`

			`def __getitem__(self, index):`
			`img_path, pid, camid = self.dataset[index]`
rm lmdb 2018-08-11 22:22:48 +01:00			`img = read_image(img_path)`
incorporate lmdb 2018-07-02 12:54:45 +01:00
first commit 2018-03-11 21:17:48 +00:00			`if self.transform is not None:`
			`img = self.transform(img)`
incorporate lmdb 2018-07-02 12:54:45 +01:00
return (imgs, pids, camids, img_paths) for image dataloaders 2018-11-10 11:54:06 +00:00			`return img, pid, camid, img_path`
first commit 2018-03-11 21:17:48 +00:00
update model & script 2018-07-02 10:17:14 +01:00
first commit 2018-03-11 21:17:48 +00:00			`class VideoDataset(Dataset):`
			`"""Video Person ReID Dataset.`
			`Note batch data has shape (batch, seq_len, channel, height, width).`
			`"""`
sample -> sample_method 2018-11-08 21:40:44 +00:00			`_sample_methods = ['evenly', 'random', 'all']`
first commit 2018-03-11 21:17:48 +00:00
sample -> sample_method 2018-11-08 21:40:44 +00:00			`def __init__(self, dataset, seq_len=15, sample_method='evenly', transform=None):`
first commit 2018-03-11 21:17:48 +00:00			`self.dataset = dataset`
			`self.seq_len = seq_len`
sample -> sample_method 2018-11-08 21:40:44 +00:00			`self.sample_method = sample_method`
first commit 2018-03-11 21:17:48 +00:00			`self.transform = transform`

			`def __len__(self):`
			`return len(self.dataset)`

			`def __getitem__(self, index):`
			`img_paths, pid, camid = self.dataset[index]`
			`num = len(img_paths)`

sample -> sample_method 2018-11-08 21:40:44 +00:00			`if self.sample_method == 'random':`
first commit 2018-03-11 21:17:48 +00:00			`"""`
			`Randomly sample seq_len items from num items,`
			`if num is smaller than seq_len, then replicate items`
			`"""`
			`indices = np.arange(num)`
			`replace = False if num >= self.seq_len else True`
			`indices = np.random.choice(indices, size=self.seq_len, replace=replace)`
update model & script 2018-07-02 10:17:14 +01:00			`# sort indices to keep temporal order (comment it to be order-agnostic)`
first commit 2018-03-11 21:17:48 +00:00			`indices = np.sort(indices)`
sample -> sample_method 2018-11-08 21:40:44 +00:00
			`elif self.sample_method == 'evenly':`
update model & script 2018-07-02 10:17:14 +01:00			`"""`
			`Evenly sample seq_len items from num items.`
			`"""`
first commit 2018-03-11 21:17:48 +00:00			`if num >= self.seq_len:`
			`num -= num % self.seq_len`
			`indices = np.arange(0, num, num/self.seq_len)`
			`else:`
			`# if num is smaller than seq_len, simply replicate the last image`
			`# until the seq_len requirement is satisfied`
			`indices = np.arange(0, num)`
			`num_pads = self.seq_len - num`
			`indices = np.concatenate([indices, np.ones(num_pads).astype(np.int32)*(num-1)])`
			`assert len(indices) == self.seq_len`
sample -> sample_method 2018-11-08 21:40:44 +00:00
			`elif self.sample_method == 'all':`
first commit 2018-03-11 21:17:48 +00:00			`"""`
			`Sample all items, seq_len is useless now and batch_size needs`
			`to be set to 1.`
			`"""`
			`indices = np.arange(num)`
sample -> sample_method 2018-11-08 21:40:44 +00:00
first commit 2018-03-11 21:17:48 +00:00			`else:`
update print 2019-01-30 22:41:47 +00:00			`raise ValueError('Unknown sample method: {}. Expected one of {}'.format(self.sample_method, self._sample_methods))`
first commit 2018-03-11 21:17:48 +00:00
			`imgs = []`
			`for index in indices:`
add lmdb generator 2018-07-02 11:57:01 +01:00			`img_path = img_paths[int(index)]`
first commit 2018-03-11 21:17:48 +00:00			`img = read_image(img_path)`
			`if self.transform is not None:`
			`img = self.transform(img)`
			`img = img.unsqueeze(0)`
			`imgs.append(img)`
			`imgs = torch.cat(imgs, dim=0)`

update print 2019-01-30 22:41:47 +00:00			`return imgs, pid, camid`