diff --git a/projects/medical/2d_image/microscopy_images/2pm_vessel/README.md b/projects/medical/2d_image/microscopy_images/2pm_vessel/README.md new file mode 100644 index 000000000..724fb9742 --- /dev/null +++ b/projects/medical/2d_image/microscopy_images/2pm_vessel/README.md @@ -0,0 +1,153 @@ +# 2-PM Vessel Dataset + +## Description + +This project supports **`2-PM Vessel Dataset`**, which can be downloaded from [here](https://opendatalab.org.cn/2-PM_Vessel_Dataset). + +### Dataset Overview + +An open-source volumetric brain vasculature dataset obtained with two-photon microscopy at Focused Ultrasound Lab, at Sunnybrook Research Institute (affiliated with University of Toronto by Dr. Alison Burgess, Charissa Poon and Marc Santos). + +The dataset contains a total of 12 volumetric stacks consisting images of mouse brain vasculature and tumor vasculature. + +### Information Statistics + +| Dataset Name | Anatomical Region | Task Type | Modality | Num. Classes | Train/Val/Test Images | Train/Val/Test Labeled | Release Date | License | +| ------------------------------------------------------------ | ----------------- | ------------ | ----------------- | ------------ | --------------------- | ---------------------- | ------------ | ------------------------------------------------------------- | +| [2pm_vessel](https://opendatalab.org.cn/2-PM_Vessel_Dataset) | vessel | segmentation | microscopy_images | 2 | 216/-/- | yes/-/- | 2021 | [CC0 1.0](https://creativecommons.org/publicdomain/zero/1.0/) | + +| Class Name | Num. Train | Pct. Train | Num. Val | Pct. Val | Num. Test | Pct. Test | +| :--------: | :--------: | :--------: | :------: | :------: | :-------: | :-------: | +| background | 216 | 85.78 | - | - | - | - | +| vessel | 180 | 14.22 | - | - | - | - | + +Note: + +- `Pct` means percentage of pixels in this category in all pixels. + +### Visualization + +![2pmv](https://raw.githubusercontent.com/uni-medical/medical-datasets-visualization/main/2d/semantic_seg/histopathology/2pm_vessel/2pm_vessel_dataset.png?raw=true) + +### Dataset Citation + +``` +@article{teikari2016deep, + title={Deep learning convolutional networks for multiphoton microscopy vasculature segmentation}, + author={Teikari, Petteri and Santos, Marc and Poon, Charissa and Hynynen, Kullervo}, + journal={arXiv preprint arXiv:1606.02382}, + year={2016} +} +``` + +### Prerequisites + +- Python v3.8 +- PyTorch v1.10.0 +- pillow(PIL) v9.3.0 +- scikit-learn(sklearn) v1.2.0 +- [MIM](https://github.com/open-mmlab/mim) v0.3.4 +- [MMCV](https://github.com/open-mmlab/mmcv) v2.0.0rc4 +- [MMEngine](https://github.com/open-mmlab/mmengine) v0.2.0 or higher +- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) v1.0.0rc5 + +All the commands below rely on the correct configuration of `PYTHONPATH`, which should point to the project's directory so that Python can locate the module files. In `2pm_vessel/` root directory, run the following line to add the current directory to `PYTHONPATH`: + +```shell +export PYTHONPATH=`pwd`:$PYTHONPATH +``` + +### Dataset Preparing + +- download dataset from [here](https://opendatalab.org.cn/2-PM_Vessel_Dataset) and decompress data to path `'data/'`. +- run script `"python tools/prepare_dataset.py"` to format data and change folder structure as below. +- run script `"python ../../tools/split_seg_dataset.py"` to split dataset and generate `train.txt`, `val.txt` and `test.txt`. If the label of official validation set and test set can't be obtained, we generate `train.txt` and `val.txt` from the training set randomly. + +```shell +mkdir data & cd data +pip install opendatalab +odl get 2-PM_Vessel_Dataset +cd .. +python tools/prepare_dataset.py +python tools/prepare_dataset.py +``` + +```none + mmsegmentation + ├── mmseg + ├── projects + │ ├── medical + │ │ ├── 2d_image + │ │ │ ├── microscopy_images + │ │ │ │ ├── 2pm_vessel + │ │ │ │ │ ├── configs + │ │ │ │ │ ├── datasets + │ │ │ │ │ ├── tools + │ │ │ │ │ ├── data + │ │ │ │ │ │ ├── train.txt + │ │ │ │ │ │ ├── val.txt + │ │ │ │ │ │ ├── images + │ │ │ │ │ │ │ ├── train + │ │ │ │ | │ │ │ ├── xxx.png + │ │ │ │ | │ │ │ ├── ... + │ │ │ │ | │ │ │ └── xxx.png + │ │ │ │ │ │ ├── masks + │ │ │ │ │ │ │ ├── train + │ │ │ │ | │ │ │ ├── xxx.png + │ │ │ │ | │ │ │ ├── ... + │ │ │ │ | │ │ │ └── xxx.png + +``` + +### Divided Dataset Information + +***Note: The table information below is divided by ourselves.*** + +| Class Name | Num. Train | Pct. Train | Num. Val | Pct. Val | Num. Test | Pct. Test | +| :--------: | :--------: | :--------: | :------: | :------: | :-------: | :-------: | +| background | 172 | 85.88 | 44 | 85.4 | - | - | +| vessel | 142 | 14.12 | 38 | 14.6 | - | - | + +### Training commands + +To train models on a single server with one GPU. (default) + +```shell +mim train mmseg ./configs/${CONFIG_FILE} +``` + +### Testing commands + +To test models on a single server with one GPU. (default) + +```shell +mim test mmseg ./configs/${CONFIG_FILE} --checkpoint ${CHECKPOINT_PATH} +``` + + + +## Checklist + +- [x] Milestone 1: PR-ready, and acceptable to be one of the `projects/`. + + - [x] Finish the code + - [x] Basic docstrings & proper citation + - [ ] Test-time correctness + - [x] A full README + +- [ ] Milestone 2: Indicates a successful model implementation. + + - [ ] Training-time correctness + +- [ ] Milestone 3: Good to be a part of our core package! + + - [ ] Type hints and docstrings + - [ ] Unit tests + - [ ] Code polishing + - [ ] Metafile.yml + +- [ ] Move your modules into the core package following the codebase's file hierarchy structure. + +- [ ] Refactor your modules into the core package following the codebase's file hierarchy structure. diff --git a/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/2pm-vessel_512x512.py b/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/2pm-vessel_512x512.py new file mode 100644 index 000000000..124403fa9 --- /dev/null +++ b/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/2pm-vessel_512x512.py @@ -0,0 +1,42 @@ +dataset_type = 'TwoPMVesselDataset' +data_root = 'data/' +img_scale = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', scale=img_scale, keep_ratio=False), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=img_scale, keep_ratio=False), + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +train_dataloader = dict( + batch_size=16, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='train.txt', + data_prefix=dict(img_path='images/', seg_map_path='masks/'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='val.txt', + data_prefix=dict(img_path='images/', seg_map_path='masks/'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU', 'mDice']) +test_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU', 'mDice']) diff --git a/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.0001-20k_2pm-vessel-512x512.py b/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.0001-20k_2pm-vessel-512x512.py new file mode 100644 index 000000000..2a429e906 --- /dev/null +++ b/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.0001-20k_2pm-vessel-512x512.py @@ -0,0 +1,17 @@ +_base_ = [ + 'mmseg::_base_/models/fcn_unet_s5-d16.py', './2pm-vessel_512x512.py', + 'mmseg::_base_/default_runtime.py', + 'mmseg::_base_/schedules/schedule_20k.py' +] +custom_imports = dict(imports='datasets.2pm-vessel_dataset') +img_scale = (512, 512) +data_preprocessor = dict(size=img_scale) +optimizer = dict(lr=0.0001) +optim_wrapper = dict(optimizer=optimizer) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=2), + auxiliary_head=None, + test_cfg=dict(mode='whole', _delete_=True)) +vis_backends = None +visualizer = dict(vis_backends=vis_backends) diff --git a/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.001-20k_2pm-vessel-512x512.py b/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.001-20k_2pm-vessel-512x512.py new file mode 100644 index 000000000..10d9bb82f --- /dev/null +++ b/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.001-20k_2pm-vessel-512x512.py @@ -0,0 +1,17 @@ +_base_ = [ + 'mmseg::_base_/models/fcn_unet_s5-d16.py', './2pm-vessel_512x512.py', + 'mmseg::_base_/default_runtime.py', + 'mmseg::_base_/schedules/schedule_20k.py' +] +custom_imports = dict(imports='datasets.2pm-vessel_dataset') +img_scale = (512, 512) +data_preprocessor = dict(size=img_scale) +optimizer = dict(lr=0.001) +optim_wrapper = dict(optimizer=optimizer) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=2), + auxiliary_head=None, + test_cfg=dict(mode='whole', _delete_=True)) +vis_backends = None +visualizer = dict(vis_backends=vis_backends) diff --git a/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.01-20k_2pm-vessel-512x512.py b/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.01-20k_2pm-vessel-512x512.py new file mode 100644 index 000000000..65c1579ec --- /dev/null +++ b/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.01-20k_2pm-vessel-512x512.py @@ -0,0 +1,17 @@ +_base_ = [ + 'mmseg::_base_/models/fcn_unet_s5-d16.py', './2pm-vessel_512x512.py', + 'mmseg::_base_/default_runtime.py', + 'mmseg::_base_/schedules/schedule_20k.py' +] +custom_imports = dict(imports='datasets.2pm-vessel_dataset') +img_scale = (512, 512) +data_preprocessor = dict(size=img_scale) +optimizer = dict(lr=0.01) +optim_wrapper = dict(optimizer=optimizer) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=2), + auxiliary_head=None, + test_cfg=dict(mode='whole', _delete_=True)) +vis_backends = None +visualizer = dict(vis_backends=vis_backends) diff --git a/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.01lr-sigmoid-20k_bactteria-detection-512x512.py b/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.01lr-sigmoid-20k_bactteria-detection-512x512.py new file mode 100644 index 000000000..91ed6ada3 --- /dev/null +++ b/projects/medical/2d_image/microscopy_images/2pm_vessel/configs/fcn-unet-s5-d16_unet_1xb16-0.01lr-sigmoid-20k_bactteria-detection-512x512.py @@ -0,0 +1,18 @@ +_base_ = [ + 'mmseg::_base_/models/fcn_unet_s5-d16.py', './2pm-vessel_512x512.py', + 'mmseg::_base_/default_runtime.py', + 'mmseg::_base_/schedules/schedule_20k.py' +] +custom_imports = dict(imports='datasets.2pm-vessel_dataset') +img_scale = (512, 512) +data_preprocessor = dict(size=img_scale) +optimizer = dict(lr=0.01) +optim_wrapper = dict(optimizer=optimizer) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict( + num_classes=2, loss_decode=dict(use_sigmoid=True), out_channels=1), + auxiliary_head=None, + test_cfg=dict(mode='whole', _delete_=True)) +vis_backends = None +visualizer = dict(vis_backends=vis_backends) diff --git a/projects/medical/2d_image/microscopy_images/2pm_vessel/datasets/2pm-vessel_dataset.py b/projects/medical/2d_image/microscopy_images/2pm_vessel/datasets/2pm-vessel_dataset.py new file mode 100644 index 000000000..984b5a136 --- /dev/null +++ b/projects/medical/2d_image/microscopy_images/2pm_vessel/datasets/2pm-vessel_dataset.py @@ -0,0 +1,31 @@ +from mmseg.datasets import BaseSegDataset +from mmseg.registry import DATASETS + + +@DATASETS.register_module() +class TwoPMVesselDataset(BaseSegDataset): + """TwoPMVesselDataset dataset. + + In segmentation map annotation for TwoPMVesselDataset, + 0 stands for background, which is included in 2 categories. + ``reduce_zero_label`` is fixed to False. The ``img_suffix`` + is fixed to '.png' and ``seg_map_suffix`` is fixed to '.png'. + + Args: + img_suffix (str): Suffix of images. Default: '.png' + seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' + reduce_zero_label (bool): Whether to mark label zero as ignored. + Default to False. + """ + METAINFO = dict(classes=('background', 'vessel')) + + def __init__(self, + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) diff --git a/projects/medical/2d_image/microscopy_images/2pm_vessel/tools/prepare_dataset.py b/projects/medical/2d_image/microscopy_images/2pm_vessel/tools/prepare_dataset.py new file mode 100644 index 000000000..1b46af2ca --- /dev/null +++ b/projects/medical/2d_image/microscopy_images/2pm_vessel/tools/prepare_dataset.py @@ -0,0 +1,46 @@ +import os + +import tifffile as tiff +from PIL import Image + +root_path = 'data/' + +image_dir = os.path.join(root_path, + '2-PM_Vessel_Dataset/raw/vesselNN_dataset/denoised') +label_dir = os.path.join(root_path, + '2-PM_Vessel_Dataset/raw/vesselNN_dataset/labels') +tgt_img_train_dir = os.path.join(root_path, 'images/train/') +tgt_mask_train_dir = os.path.join(root_path, 'masks/train/') +os.system('mkdir -p ' + tgt_img_train_dir) +os.system('mkdir -p ' + tgt_mask_train_dir) + + +def filter_suffix(src_dir, suffix): + suffix = '.' + suffix if '.' not in suffix else suffix + file_names = [_ for _ in os.listdir(src_dir) if _.endswith(suffix)] + file_paths = [os.path.join(src_dir, _) for _ in file_names] + return sorted(file_paths), sorted(file_names) + + +if __name__ == '__main__': + + image_path_list, _ = filter_suffix(image_dir, suffix='tif') + label_path_list, _ = filter_suffix(label_dir, suffix='.tif') + + for img_path, label_path in zip(image_path_list, label_path_list): + labels = tiff.imread(label_path) + images = tiff.imread(img_path) + assert labels.ndim == 3 + assert images.shape == labels.shape + name = img_path.split('/')[-1].replace('.tif', '') + # a single .tif file contains multiple slices + # as long as it is read by tifffile package. + for i in range(labels.shape[0]): + slice_name = name + '_' + str(i).rjust(3, '0') + '.png' + image = images[i] + label = labels[i] // 255 + + save_path_label = os.path.join(tgt_mask_train_dir, slice_name) + Image.fromarray(label).save(save_path_label) + save_path_image = os.path.join(tgt_img_train_dir, slice_name) + Image.fromarray(image).convert('RGB').save(save_path_image)