DINOv/datasets/dataset_mappers/pascalcontext_dataset_mapper.py
2024-03-13 13:59:00 +08:00

281 lines
10 KiB
Python

# Copyright (c) Facebook, Inc. and its affiliates.
# Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py
import copy
import scipy.io
import numpy as np
import torch
from PIL import Image
from torchvision import transforms
from detectron2.config import configurable
from detectron2.data import detection_utils as utils
from detectron2.structures import BitMasks, Boxes, Instances
from detectron2.data import transforms as T
__all__ = ["PascalContextSegDatasetMapper"]
# This is specifically designed for the COCO dataset.
class PascalContextSegDatasetMapper_ori:
"""
A callable which takes a dataset dict in Detectron2 Dataset format,
and map it into a format used by MaskFormer.
This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
The callable currently does the following:
1. Read the image from "file_name"
2. Applies geometric transforms to the image and annotation
3. Find and applies suitable cropping to the image and annotation
4. Prepare image and annotation to Tensors
"""
@configurable
def __init__(
self,
is_train=True,
min_size_test=None,
max_size_test=None,
mean=None,
std=None,
):
"""
NOTE: this interface is experimental.
Args:
is_train: for training or inference
augmentations: a list of augmentations or deterministic transforms to apply
tfm_gens: data augmentation
image_format: an image format supported by :func:`detection_utils.read_image`.
"""
self.is_train = is_train
self.min_size_test = min_size_test
self.max_size_test = max_size_test
self.pixel_mean = torch.tensor(mean)[:, None, None]
self.pixel_std = torch.tensor(std)[:, None, None]
t = []
t.append(transforms.Resize(self.min_size_test, interpolation=Image.BICUBIC))
# HACK for ViT evaluation
# t.append(transforms.Resize([self.min_size_test, self.min_size_test], interpolation=Image.BICUBIC))
self.transform = transforms.Compose(t)
@classmethod
def from_config(cls, cfg, is_train=True):
ret = {
"is_train": is_train,
"min_size_test": cfg['INPUT']['MIN_SIZE_TEST'],
"max_size_test": cfg['INPUT']['MAX_SIZE_TEST'],
"mean": cfg['INPUT']['PIXEL_MEAN'],
"std": cfg['INPUT']['PIXEL_STD'],
}
return ret
def read_semseg(self, file_name):
if '.png' in file_name:
semseg = np.asarray(Image.open(file_name))
elif '.mat' in file_name:
semseg = scipy.io.loadmat(file_name)['LabelMap']
return semseg
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
file_name = dataset_dict['file_name']
semseg_name = dataset_dict['sem_seg_file_name']
image = Image.open(file_name).convert('RGB')
dataset_dict['width'] = image.size[0]
dataset_dict['height'] = image.size[1]
if self.is_train == False:
image = self.transform(image)
image = torch.from_numpy(np.asarray(image).copy())
image = image.permute(2, 0, 1)
semseg = self.read_semseg(semseg_name)
semseg = torch.from_numpy(semseg.astype(np.int32))
dataset_dict['image'] = image
dataset_dict['semseg'] = semseg
return dataset_dict
class PascalContextSegDatasetMapper:
"""
A callable which takes a dataset dict in Detectron2 Dataset format,
and map it into a format used by MaskFormer.
This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
The callable currently does the following:
1. Read the image from "file_name"
2. Applies geometric transforms to the image and annotation
3. Find and applies suitable cropping to the image and annotation
4. Prepare image and annotation to Tensors
"""
@configurable
def __init__(
self,
is_train=True,
augmentations=None,
min_size_test=None,
max_size_test=None,
mean=None,
std=None,
):
"""
NOTE: this interface is experimental.
Args:
is_train: for training or inference
augmentations: a list of augmentations or deterministic transforms to apply
tfm_gens: data augmentation
image_format: an image format supported by :func:`detection_utils.read_image`.
"""
self.is_train = is_train
self.min_size_test = min_size_test
self.max_size_test = max_size_test
self.pixel_mean = torch.tensor(mean)[:, None, None]
self.pixel_std = torch.tensor(std)[:, None, None]
t = []
t.append(transforms.Resize(self.min_size_test, interpolation=Image.BICUBIC))
# HACK for ViT evaluation
# t.append(transforms.Resize([self.min_size_test, self.min_size_test], interpolation=Image.BICUBIC))
self.transform = transforms.Compose(t)
self.augmentations = T.AugmentationList(augmentations)
self.ignore_label = 0
@classmethod
def from_config(cls, cfg, is_train=True):
augs = utils.build_augmentation(cfg, is_train)
ret = {
"is_train": is_train,
"augmentations": augs,
"min_size_test": cfg['INPUT']['MIN_SIZE_TEST'],
"max_size_test": cfg['INPUT']['MAX_SIZE_TEST'],
"mean": cfg['INPUT']['PIXEL_MEAN'],
"std": cfg['INPUT']['PIXEL_STD'],
}
return ret
def read_semseg(self, file_name):
if '.png' in file_name:
semseg = np.asarray(Image.open(file_name))
elif '.mat' in file_name:
semseg = scipy.io.loadmat(file_name)['LabelMap']
return semseg
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
# USER: Write your own image loading if it's not from a file
image = utils.read_image(dataset_dict["file_name"], format='RGB')
utils.check_image_size(dataset_dict, image)
# USER: Remove if you don't do semantic/panoptic segmentation.
semseg_name = None
if "sem_seg_file_name" in dataset_dict:
semseg_name = dataset_dict.pop("sem_seg_file_name")
if semseg_name.split('.')[-1]!='mat':
sem_seg_gt = utils.read_image(semseg_name, "L").squeeze(2)
else:
sem_seg_gt = self.read_semseg(semseg_name)
sem_seg_gt = sem_seg_gt.astype(np.uint8)
else:
sem_seg_gt = None
aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
transforms = self.augmentations(aug_input)
image, sem_seg_gt = aug_input.image, aug_input.sem_seg
image_shape = image.shape[:2] # h, w
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
# Therefore it's important to use torch.Tensor.
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
if sem_seg_gt is not None:
sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
# if self.size_divisibility > 0:
# image_size = (image.shape[-2], image.shape[-1])
# padding_size = [
# 0,
# self.size_divisibility - image_size[1],
# 0,
# self.size_divisibility - image_size[0],
# ]
# image = F.pad(image, padding_size, value=128).contiguous()
# if sem_seg_gt is not None:
# sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous()
if "annotations" in dataset_dict:
raise ValueError("Semantic segmentation dataset should not have 'annotations'.")
# Prepare per-category binary masks
if sem_seg_gt is not None:
# sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
###
sem_seg_gt = sem_seg_gt.numpy()
instances = Instances(image_shape)
classes = np.unique(sem_seg_gt)
# remove ignored region
# TODO: this is a hack for datasets with backgorund as 0, which is meaningless
classes = classes[classes != self.ignore_label] - 1
# print("semseg_name, classes ", semseg_name, classes)
instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
masks = []
for class_id in classes:
masks.append(sem_seg_gt == class_id)
if len(masks) == 0:
# Some image does not have annotation (all ignored)
instances.gt_masks = torch.zeros((0, sem_seg_gt.shape[-2], sem_seg_gt.shape[-1]))
else:
masks = BitMasks(
torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
)
instances.gt_masks = masks.tensor
instances.gt_boxes = masks.get_bounding_boxes()
dataset_dict["instances"] = instances
#######
# dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
# file_name = dataset_dict['file_name']
# semseg_name = dataset_dict['sem_seg_file_name']
# image = Image.open(file_name).convert('RGB')
#
# dataset_dict['width'] = image.size[0]
# dataset_dict['height'] = image.size[1]
#
# if self.is_train == False:
# image = self.transform(image)
# image = torch.from_numpy(np.asarray(image).copy())
# image = image.permute(2, 0, 1)
#
# semseg = self.read_semseg(semseg_name)
# semseg = torch.from_numpy(semseg.astype(np.int32))
#
# dataset_dict['image'] = image
# dataset_dict['semseg'] = semseg
return dataset_dict