diff --git a/docs/en/user_guides/2_dataset_prepare.md b/docs/en/user_guides/2_dataset_prepare.md index 24b95db67..2816a51f0 100644 --- a/docs/en/user_guides/2_dataset_prepare.md +++ b/docs/en/user_guides/2_dataset_prepare.md @@ -198,6 +198,13 @@ mmsegmentation | │   │   │ └── rles | │ │ │ │ ├──sem_seg_train.json | │ │ │ │ └──sem_seg_val.json +│ ├── nyu +│ │ ├── images +│ │ │ ├── train +│ │ │ ├── test +│ │ ├── annotations +│ │ │ ├── train +│ │ │ ├── test ``` ## Download dataset via MIM @@ -735,3 +742,13 @@ mmsegmentation | │ │ │ │ ├──sem_seg_train.json | │ │ │ │ └──sem_seg_val.json ``` + +## NYU + +- To access the NYU dataset, you can download it from [this link](https://drive.google.com/file/d/1wC-io-14RCIL4XTUrQLk6lBqU2AexLVp/view?usp=share_link) + +- Once the download is complete, you can utilize the [tools/dataset_converters/nyu.py](/tools/dataset_converters/nyu.py) script to extract and organize the data into the required format. Run the following command in your terminal: + + ```bash + python tools/dataset_converters/nyu.py nyu.zip + ``` diff --git a/docs/zh_cn/user_guides/2_dataset_prepare.md b/docs/zh_cn/user_guides/2_dataset_prepare.md index 92fdf0079..5532624be 100644 --- a/docs/zh_cn/user_guides/2_dataset_prepare.md +++ b/docs/zh_cn/user_guides/2_dataset_prepare.md @@ -198,6 +198,13 @@ mmsegmentation | │   │   │ └── rles | │ │ │ │ ├──sem_seg_train.json | │ │ │ │ └──sem_seg_val.json +│ ├── nyu +│ │ ├── images +│ │ │ ├── train +│ │ │ ├── test +│ │ ├── annotations +│ │ │ ├── train +│ │ │ ├── test ``` ## 用 MIM 下载数据集 @@ -731,3 +738,13 @@ mmsegmentation | │ │ │ │ ├──sem_seg_train.json | │ │ │ │ └──sem_seg_val.json ``` + +## NYU + +- 您可以从 [这个链接](https://drive.google.com/file/d/1wC-io-14RCIL4XTUrQLk6lBqU2AexLVp/view?usp=share_link) 下载 NYU 数据集 + +- 下载完成后,您可以使用 [tools/dataset_converters/nyu.py](/tools/dataset_converters/nyu.py) 脚本来解压和组织数据到所需的格式 + + ```bash + python tools/dataset_converters/nyu.py nyu.zip + ``` diff --git a/mmseg/datasets/__init__.py b/mmseg/datasets/__init__.py index 633eb2b40..a2bdb63d0 100644 --- a/mmseg/datasets/__init__.py +++ b/mmseg/datasets/__init__.py @@ -19,6 +19,7 @@ from .lip import LIPDataset from .loveda import LoveDADataset from .mapillary import MapillaryDataset_v1, MapillaryDataset_v2 from .night_driving import NightDrivingDataset +from .nyu import NYUDataset from .pascal_context import PascalContextDataset, PascalContextDataset59 from .potsdam import PotsdamDataset from .refuge import REFUGEDataset @@ -58,5 +59,6 @@ __all__ = [ 'SynapseDataset', 'REFUGEDataset', 'MapillaryDataset_v1', 'MapillaryDataset_v2', 'Albu', 'LEVIRCDDataset', 'LoadMultipleRSImageFromFile', 'LoadSingleRSImageFromFile', - 'ConcatCDInput', 'BaseCDDataset', 'DSDLSegDataset', 'BDD100KDataset' + 'ConcatCDInput', 'BaseCDDataset', 'DSDLSegDataset', 'BDD100KDataset', + 'NYUDataset' ] diff --git a/mmseg/datasets/nyu.py b/mmseg/datasets/nyu.py new file mode 100644 index 000000000..fcfda4664 --- /dev/null +++ b/mmseg/datasets/nyu.py @@ -0,0 +1,123 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from typing import List + +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class NYUDataset(BaseSegDataset): + """NYU depth estimation dataset. The file structure should be. + + .. code-block:: none + + ├── data + │ ├── nyu + │ │ ├── images + │ │ │ ├── train + │ │ │ │ ├── scene_xxx.jpg + │ │ │ │ ├── ... + │ │ │ ├── test + │ │ ├── annotations + │ │ │ ├── train + │ │ │ │ ├── scene_xxx.png + │ │ │ │ ├── ... + │ │ │ ├── test + + Args: + ann_file (str): Annotation file path. Defaults to ''. + metainfo (dict, optional): Meta information for dataset, such as + specify classes to load. Defaults to None. + data_root (str, optional): The root directory for ``data_prefix`` and + ``ann_file``. Defaults to None. + data_prefix (dict, optional): Prefix for training data. Defaults to + dict(img_path='images', depth_map_path='annotations'). + img_suffix (str): Suffix of images. Default: '.jpg' + seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' + filter_cfg (dict, optional): Config for filter data. Defaults to None. + indices (int or Sequence[int], optional): Support using first few + data in annotation file to facilitate training/testing on a smaller + dataset. Defaults to None which means using all ``data_infos``. + serialize_data (bool, optional): Whether to hold memory using + serialized objects, when enabled, data loader workers can use + shared RAM from master process instead of making a copy. Defaults + to True. + pipeline (list, optional): Processing pipeline. Defaults to []. + test_mode (bool, optional): ``test_mode=True`` means in test phase. + Defaults to False. + lazy_init (bool, optional): Whether to load annotation during + instantiation. In some cases, such as visualization, only the meta + information of the dataset is needed, which is not necessary to + load annotation file. ``Basedataset`` can skip load annotations to + save time by set ``lazy_init=True``. Defaults to False. + max_refetch (int, optional): If ``Basedataset.prepare_data`` get a + None img. The maximum extra number of cycles to get a valid + image. Defaults to 1000. + ignore_index (int): The label index to be ignored. Default: 255 + reduce_zero_label (bool): Whether to mark label zero as ignored. + Default to False. + backend_args (dict, Optional): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + METAINFO = dict( + classes=('printer_room', 'bathroom', 'living_room', 'study', + 'conference_room', 'study_room', 'kitchen', 'home_office', + 'bedroom', 'dinette', 'playroom', 'indoor_balcony', + 'laundry_room', 'basement', 'excercise_room', 'foyer', + 'home_storage', 'cafe', 'furniture_store', 'office_kitchen', + 'student_lounge', 'dining_room', 'reception_room', + 'computer_lab', 'classroom', 'office', 'bookstore')) + + def __init__(self, + data_prefix=dict( + img_path='images', depth_map_path='annotations'), + img_suffix='.jpg', + depth_map_suffix='.png', + **kwargs) -> None: + super().__init__( + data_prefix=data_prefix, + img_suffix=img_suffix, + seg_map_suffix=depth_map_suffix, + **kwargs) + + def _get_category_id_from_filename(self, image_fname: str) -> int: + """Retrieve the category ID from the given image filename.""" + image_fname = osp.basename(image_fname) + position = image_fname.find(next(filter(str.isdigit, image_fname)), 0) + categoty_name = image_fname[:position - 1] + if categoty_name not in self._metainfo['classes']: + return -1 + else: + return self._metainfo['classes'].index(categoty_name) + + def load_data_list(self) -> List[dict]: + """Load annotation from directory or annotation file. + + Returns: + list[dict]: All data info of dataset. + """ + data_list = [] + img_dir = self.data_prefix.get('img_path', None) + ann_dir = self.data_prefix.get('depth_map_path', None) + + _suffix_len = len(self.img_suffix) + for img in fileio.list_dir_or_file( + dir_path=img_dir, + list_dir=False, + suffix=self.img_suffix, + recursive=True, + backend_args=self.backend_args): + data_info = dict(img_path=osp.join(img_dir, img)) + if ann_dir is not None: + depth_map = img[:-_suffix_len] + self.seg_map_suffix + data_info['depth_map_path'] = osp.join(ann_dir, depth_map) + data_info['seg_fields'] = [] + data_info['category_id'] = self._get_category_id_from_filename(img) + data_list.append(data_info) + data_list = sorted(data_list, key=lambda x: x['img_path']) + return data_list diff --git a/mmseg/datasets/transforms/__init__.py b/mmseg/datasets/transforms/__init__.py index fe6028547..03c3f866b 100644 --- a/mmseg/datasets/transforms/__init__.py +++ b/mmseg/datasets/transforms/__init__.py @@ -2,8 +2,8 @@ from .formatting import PackSegInputs from .loading import (LoadAnnotations, LoadBiomedicalAnnotation, LoadBiomedicalData, LoadBiomedicalImageFromFile, - LoadImageFromNDArray, LoadMultipleRSImageFromFile, - LoadSingleRSImageFromFile) + LoadDepthAnnotation, LoadImageFromNDArray, + LoadMultipleRSImageFromFile, LoadSingleRSImageFromFile) # yapf: disable from .transforms import (CLAHE, AdjustGamma, Albu, BioMedical3DPad, BioMedical3DRandomCrop, BioMedical3DRandomFlip, @@ -24,5 +24,5 @@ __all__ = [ 'ResizeShortestEdge', 'BioMedicalGaussianNoise', 'BioMedicalGaussianBlur', 'BioMedical3DRandomFlip', 'BioMedicalRandomGamma', 'BioMedical3DPad', 'RandomRotFlip', 'Albu', 'LoadSingleRSImageFromFile', 'ConcatCDInput', - 'LoadMultipleRSImageFromFile' + 'LoadMultipleRSImageFromFile', 'LoadDepthAnnotation' ] diff --git a/mmseg/datasets/transforms/formatting.py b/mmseg/datasets/transforms/formatting.py index 89fd88379..bd250551e 100644 --- a/mmseg/datasets/transforms/formatting.py +++ b/mmseg/datasets/transforms/formatting.py @@ -92,6 +92,11 @@ class PackSegInputs(BaseTransform): ...].astype(np.int64))) data_sample.set_data(dict(gt_edge_map=PixelData(**gt_edge_data))) + if 'gt_depth_map' in results: + gt_depth_data = dict( + data=to_tensor(results['gt_depth_map'][None, ...])) + data_sample.set_data(dict(gt_depth_map=PixelData(**gt_depth_data))) + img_meta = {} for key in self.meta_keys: if key in results: diff --git a/mmseg/datasets/transforms/loading.py b/mmseg/datasets/transforms/loading.py index 001810e81..c7d6af0ef 100644 --- a/mmseg/datasets/transforms/loading.py +++ b/mmseg/datasets/transforms/loading.py @@ -625,3 +625,77 @@ class LoadMultipleRSImageFromFile(BaseTransform): repr_str = (f'{self.__class__.__name__}(' f'to_float32={self.to_float32})') return repr_str + + +@TRANSFORMS.register_module() +class LoadDepthAnnotation(BaseTransform): + """Load ``depth_map`` annotation provided by depth estimation dataset. + + The annotation format is as the following: + + .. code-block:: python + + { + 'gt_depth_map': np.ndarray [Y, X] + } + + Required Keys: + + - seg_depth_path + + Added Keys: + + - gt_depth_map (np.ndarray): Depth map with shape (Y, X) by + default, and data type is float32 if set to_float32 = True. + + Args: + decode_backend (str): The data decoding backend type. Options are + 'numpy', 'nifti', and 'cv2'. Defaults to 'cv2'. + to_float32 (bool): Whether to convert the loaded depth map to a float32 + numpy array. If set to False, the loaded image is an uint16 array. + Defaults to True. + depth_rescale_factor (float): Factor to rescale the depth value to + limit the range. Defaults to 1.0. + backend_args (dict, Optional): Arguments to instantiate a file backend. + See :class:`mmengine.fileio` for details. + Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + + def __init__(self, + decode_backend: str = 'cv2', + to_float32: bool = True, + depth_rescale_factor: float = 1.0, + backend_args: Optional[dict] = None) -> None: + super().__init__() + self.decode_backend = decode_backend + self.to_float32 = to_float32 + self.depth_rescale_factor = depth_rescale_factor + self.backend_args = backend_args.copy() if backend_args else None + + def transform(self, results: Dict) -> Dict: + """Functions to load depth map. + + Args: + results (dict): Result dict from :obj:``mmcv.BaseDataset``. + + Returns: + dict: The dict contains loaded depth map. + """ + data_bytes = fileio.get(results['depth_map_path'], self.backend_args) + gt_depth_map = datafrombytes(data_bytes, backend=self.decode_backend) + + if self.to_float32: + gt_depth_map = gt_depth_map.astype(np.float32) + + gt_depth_map *= self.depth_rescale_factor + results['gt_depth_map'] = gt_depth_map + results['seg_fields'].append('gt_depth_map') + return results + + def __repr__(self): + repr_str = (f'{self.__class__.__name__}(' + f"decode_backend='{self.decode_backend}', " + f'to_float32={self.to_float32}, ' + f'backend_args={self.backend_args})') + return repr_str diff --git a/mmseg/utils/io.py b/mmseg/utils/io.py index d03517401..c0d003cc9 100644 --- a/mmseg/utils/io.py +++ b/mmseg/utils/io.py @@ -3,6 +3,7 @@ import gzip import io import pickle +import cv2 import numpy as np @@ -12,7 +13,7 @@ def datafrombytes(content: bytes, backend: str = 'numpy') -> np.ndarray: Args: content (bytes): The data bytes got from files or other streams. backend (str): The data decoding backend type. Options are 'numpy', - 'nifti' and 'pickle'. Defaults to 'numpy'. + 'nifti', 'cv2' and 'pickle'. Defaults to 'numpy'. Returns: numpy.ndarray: Loaded data array. @@ -33,6 +34,9 @@ def datafrombytes(content: bytes, backend: str = 'numpy') -> np.ndarray: data = Nifti1Image.from_bytes(data.to_bytes()).get_fdata() elif backend == 'numpy': data = np.load(f) + elif backend == 'cv2': + data = np.frombuffer(f.read(), dtype=np.uint16) + data = cv2.imdecode(data, 2) else: raise ValueError return data diff --git a/tests/data/pseudo_nyu_dataset/annotations/bookstore_0001d_00001.png b/tests/data/pseudo_nyu_dataset/annotations/bookstore_0001d_00001.png new file mode 100644 index 000000000..77e343603 Binary files /dev/null and b/tests/data/pseudo_nyu_dataset/annotations/bookstore_0001d_00001.png differ diff --git a/tests/data/pseudo_nyu_dataset/images/bookstore_0001d_00001.jpg b/tests/data/pseudo_nyu_dataset/images/bookstore_0001d_00001.jpg new file mode 100644 index 000000000..7892ed47e Binary files /dev/null and b/tests/data/pseudo_nyu_dataset/images/bookstore_0001d_00001.jpg differ diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index 34d0cfc27..2904e09ce 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -9,7 +9,7 @@ from mmseg.datasets import (ADE20KDataset, BaseSegDataset, BDD100KDataset, CityscapesDataset, COCOStuffDataset, DecathlonDataset, DSDLSegDataset, ISPRSDataset, LIPDataset, LoveDADataset, MapillaryDataset_v1, - MapillaryDataset_v2, PascalVOCDataset, + MapillaryDataset_v2, NYUDataset, PascalVOCDataset, PotsdamDataset, REFUGEDataset, SynapseDataset, iSAIDDataset) from mmseg.registry import DATASETS @@ -462,3 +462,14 @@ def test_dsdlseg_dataset(): assert len(dataset.metainfo['classes']) == 21 else: ImportWarning('Package `dsdl` is not installed.') + + +def test_nyu_dataset(): + dataset = NYUDataset( + data_root='tests/data/pseudo_nyu_dataset', + data_prefix=dict(img_path='images', depth_map_path='annotations'), + ) + assert len(dataset) == 1 + data = dataset[0] + assert data.get('depth_map_path', None) is not None + assert data.get('category_id', -1) == 26 diff --git a/tests/test_datasets/test_loading.py b/tests/test_datasets/test_loading.py index 5ce624bff..3eea6e3f9 100644 --- a/tests/test_datasets/test_loading.py +++ b/tests/test_datasets/test_loading.py @@ -7,10 +7,11 @@ import mmcv import numpy as np from mmcv.transforms import LoadImageFromFile -from mmseg.datasets.transforms import (LoadAnnotations, - LoadBiomedicalAnnotation, +from mmseg.datasets.transforms import LoadAnnotations # noqa +from mmseg.datasets.transforms import (LoadBiomedicalAnnotation, LoadBiomedicalData, LoadBiomedicalImageFromFile, + LoadDepthAnnotation, LoadImageFromNDArray) @@ -276,3 +277,19 @@ class TestLoading: "decode_backend='numpy', " 'to_xyz=False, ' 'backend_args=None)') + + def test_load_depth_annotation(self): + input_results = dict( + img_path='tests/data/pseudo_nyu_dataset/images/' + 'bookstore_0001d_00001.jpg', + depth_map_path='tests/data/pseudo_nyu_dataset/' + 'annotations/bookstore_0001d_00001.png', + category_id=-1, + seg_fields=[]) + transform = LoadDepthAnnotation(depth_rescale_factor=0.001) + results = transform(input_results) + assert 'gt_depth_map' in results + assert results['gt_depth_map'].shape[:2] == mmcv.imread( + input_results['depth_map_path']).shape[:2] + assert results['gt_depth_map'].dtype == np.float32 + assert 'gt_depth_map' in results['seg_fields'] diff --git a/tools/dataset_converters/nyu.py b/tools/dataset_converters/nyu.py new file mode 100644 index 000000000..49e09e7af --- /dev/null +++ b/tools/dataset_converters/nyu.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +import shutil +import tempfile +import zipfile + +from mmengine.utils import mkdir_or_exist + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert NYU Depth dataset to mmsegmentation format') + parser.add_argument('raw_data', help='the path of raw data') + parser.add_argument( + '-o', '--out_dir', help='output path', default='./data/nyu') + args = parser.parse_args() + return args + + +def reorganize(raw_data_dir: str, out_dir: str): + """Reorganize NYU Depth dataset files into the required directory + structure. + + Args: + raw_data_dir (str): Path to the raw data directory. + out_dir (str): Output directory for the organized dataset. + """ + + def move_data(data_list, dst_prefix, fname_func): + """Move data files from source to destination directory. + + Args: + data_list (list): List of data file paths. + dst_prefix (str): Prefix to be added to destination paths. + fname_func (callable): Function to process file names + """ + for data_item in data_list: + data_item = data_item.strip().strip('/') + new_item = fname_func(data_item) + shutil.move( + osp.join(raw_data_dir, data_item), + osp.join(out_dir, dst_prefix, new_item)) + + def process_phase(phase): + """Process a dataset phase (e.g., 'train' or 'test').""" + with open(osp.join(raw_data_dir, f'nyu_{phase}.txt')) as f: + data = filter(lambda x: len(x.strip()) > 0, f.readlines()) + data = map(lambda x: x.split()[:2], data) + images, annos = zip(*data) + + move_data(images, f'images/{phase}', + lambda x: x.replace('/rgb', '')) + move_data(annos, f'annotations/{phase}', + lambda x: x.replace('/sync_depth', '')) + + process_phase('train') + process_phase('test') + + +def main(): + args = parse_args() + + print('Making directories...') + mkdir_or_exist(args.out_dir) + for subdir in [ + 'images/train', 'images/test', 'annotations/train', + 'annotations/test' + ]: + mkdir_or_exist(osp.join(args.out_dir, subdir)) + + print('Generating images and annotations...') + + if args.raw_data.endswith('.zip'): + with tempfile.TemporaryDirectory() as tmp_dir: + zip_file = zipfile.ZipFile(args.raw_data) + zip_file.extractall(tmp_dir) + reorganize(osp.join(tmp_dir, 'nyu'), args.out_dir) + else: + assert osp.isdir( + args.raw_data + ), 'the argument --raw-data should be either a zip file or directory.' + reorganize(args.raw_data, args.out_dir) + + print('Done!') + + +if __name__ == '__main__': + main()