object_localization_network/tests/test_data/test_dataset.py

494 lines
16 KiB
Python
Raw Normal View History

2021-08-30 00:36:15 +08:00
import bisect
import logging
import math
import os.path as osp
import tempfile
from collections import defaultdict
from unittest.mock import MagicMock, patch
import mmcv
import numpy as np
import pytest
import torch
import torch.nn as nn
from mmcv.runner import EpochBasedRunner
from torch.utils.data import DataLoader
from mmdet.core.evaluation import DistEvalHook, EvalHook
from mmdet.datasets import (DATASETS, ClassBalancedDataset, CocoDataset,
ConcatDataset, CustomDataset, RepeatDataset,
build_dataset)
def _create_dummy_coco_json(json_name):
image = {
'id': 0,
'width': 640,
'height': 640,
'file_name': 'fake_name.jpg',
}
annotation_1 = {
'id': 1,
'image_id': 0,
'category_id': 0,
'area': 400,
'bbox': [50, 60, 20, 20],
'iscrowd': 0,
}
annotation_2 = {
'id': 2,
'image_id': 0,
'category_id': 0,
'area': 900,
'bbox': [100, 120, 30, 30],
'iscrowd': 0,
}
annotation_3 = {
'id': 3,
'image_id': 0,
'category_id': 0,
'area': 1600,
'bbox': [150, 160, 40, 40],
'iscrowd': 0,
}
annotation_4 = {
'id': 4,
'image_id': 0,
'category_id': 0,
'area': 10000,
'bbox': [250, 260, 100, 100],
'iscrowd': 0,
}
categories = [{
'id': 0,
'name': 'car',
'supercategory': 'car',
}]
fake_json = {
'images': [image],
'annotations':
[annotation_1, annotation_2, annotation_3, annotation_4],
'categories': categories
}
mmcv.dump(fake_json, json_name)
def _create_dummy_custom_pkl(pkl_name):
fake_pkl = [{
'filename': 'fake_name.jpg',
'width': 640,
'height': 640,
'ann': {
'bboxes':
np.array([[50, 60, 70, 80], [100, 120, 130, 150],
[150, 160, 190, 200], [250, 260, 350, 360]]),
'labels':
np.array([0, 0, 0, 0])
}
}]
mmcv.dump(fake_pkl, pkl_name)
def _create_dummy_results():
boxes = [
np.array([[50, 60, 70, 80, 1.0], [100, 120, 130, 150, 0.98],
[150, 160, 190, 200, 0.96], [250, 260, 350, 360, 0.95]])
]
return [boxes]
def test_dataset_evaluation():
tmp_dir = tempfile.TemporaryDirectory()
# create dummy data
fake_json_file = osp.join(tmp_dir.name, 'fake_data.json')
_create_dummy_coco_json(fake_json_file)
# test single coco dataset evaluation
coco_dataset = CocoDataset(
ann_file=fake_json_file, classes=('car', ), pipeline=[])
fake_results = _create_dummy_results()
eval_results = coco_dataset.evaluate(fake_results, classwise=True)
assert eval_results['bbox_mAP'] == 1
assert eval_results['bbox_mAP_50'] == 1
assert eval_results['bbox_mAP_75'] == 1
# test concat dataset evaluation
fake_concat_results = _create_dummy_results() + _create_dummy_results()
# build concat dataset through two config dict
coco_cfg = dict(
type='CocoDataset',
ann_file=fake_json_file,
classes=('car', ),
pipeline=[])
concat_cfgs = [coco_cfg, coco_cfg]
concat_dataset = build_dataset(concat_cfgs)
eval_results = concat_dataset.evaluate(fake_concat_results)
assert eval_results['0_bbox_mAP'] == 1
assert eval_results['0_bbox_mAP_50'] == 1
assert eval_results['0_bbox_mAP_75'] == 1
assert eval_results['1_bbox_mAP'] == 1
assert eval_results['1_bbox_mAP_50'] == 1
assert eval_results['1_bbox_mAP_75'] == 1
# build concat dataset through concatenated ann_file
coco_cfg = dict(
type='CocoDataset',
ann_file=[fake_json_file, fake_json_file],
classes=('car', ),
pipeline=[])
concat_dataset = build_dataset(coco_cfg)
eval_results = concat_dataset.evaluate(fake_concat_results)
assert eval_results['0_bbox_mAP'] == 1
assert eval_results['0_bbox_mAP_50'] == 1
assert eval_results['0_bbox_mAP_75'] == 1
assert eval_results['1_bbox_mAP'] == 1
assert eval_results['1_bbox_mAP_50'] == 1
assert eval_results['1_bbox_mAP_75'] == 1
# create dummy data
fake_pkl_file = osp.join(tmp_dir.name, 'fake_data.pkl')
_create_dummy_custom_pkl(fake_pkl_file)
# test single custom dataset evaluation
custom_dataset = CustomDataset(
ann_file=fake_pkl_file, classes=('car', ), pipeline=[])
fake_results = _create_dummy_results()
eval_results = custom_dataset.evaluate(fake_results)
assert eval_results['mAP'] == 1
# test concat dataset evaluation
fake_concat_results = _create_dummy_results() + _create_dummy_results()
# build concat dataset through two config dict
custom_cfg = dict(
type='CustomDataset',
ann_file=fake_pkl_file,
classes=('car', ),
pipeline=[])
concat_cfgs = [custom_cfg, custom_cfg]
concat_dataset = build_dataset(concat_cfgs)
eval_results = concat_dataset.evaluate(fake_concat_results)
assert eval_results['0_mAP'] == 1
assert eval_results['1_mAP'] == 1
# build concat dataset through concatenated ann_file
concat_cfg = dict(
type='CustomDataset',
ann_file=[fake_pkl_file, fake_pkl_file],
classes=('car', ),
pipeline=[])
concat_dataset = build_dataset(concat_cfg)
eval_results = concat_dataset.evaluate(fake_concat_results)
assert eval_results['0_mAP'] == 1
assert eval_results['1_mAP'] == 1
# build concat dataset through explict type
concat_cfg = dict(
type='ConcatDataset',
datasets=[custom_cfg, custom_cfg],
separate_eval=False)
concat_dataset = build_dataset(concat_cfg)
eval_results = concat_dataset.evaluate(fake_concat_results, metric='mAP')
assert eval_results['mAP'] == 1
assert len(concat_dataset.datasets[0].data_infos) == \
len(concat_dataset.datasets[1].data_infos)
assert len(concat_dataset.datasets[0].data_infos) == 1
tmp_dir.cleanup()
@patch('mmdet.datasets.CocoDataset.load_annotations', MagicMock)
@patch('mmdet.datasets.CustomDataset.load_annotations', MagicMock)
@patch('mmdet.datasets.XMLDataset.load_annotations', MagicMock)
@patch('mmdet.datasets.CityscapesDataset.load_annotations', MagicMock)
@patch('mmdet.datasets.CocoDataset._filter_imgs', MagicMock)
@patch('mmdet.datasets.CustomDataset._filter_imgs', MagicMock)
@patch('mmdet.datasets.XMLDataset._filter_imgs', MagicMock)
@patch('mmdet.datasets.CityscapesDataset._filter_imgs', MagicMock)
@pytest.mark.parametrize('dataset',
['CocoDataset', 'VOCDataset', 'CityscapesDataset'])
def test_custom_classes_override_default(dataset):
dataset_class = DATASETS.get(dataset)
if dataset in ['CocoDataset', 'CityscapesDataset']:
dataset_class.coco = MagicMock()
dataset_class.cat_ids = MagicMock()
original_classes = dataset_class.CLASSES
# Test setting classes as a tuple
custom_dataset = dataset_class(
ann_file=MagicMock(),
pipeline=[],
classes=('bus', 'car'),
test_mode=True,
img_prefix='VOC2007' if dataset == 'VOCDataset' else '')
assert custom_dataset.CLASSES != original_classes
assert custom_dataset.CLASSES == ('bus', 'car')
# Test setting classes as a list
custom_dataset = dataset_class(
ann_file=MagicMock(),
pipeline=[],
classes=['bus', 'car'],
test_mode=True,
img_prefix='VOC2007' if dataset == 'VOCDataset' else '')
assert custom_dataset.CLASSES != original_classes
assert custom_dataset.CLASSES == ['bus', 'car']
# Test overriding not a subset
custom_dataset = dataset_class(
ann_file=MagicMock(),
pipeline=[],
classes=['foo'],
test_mode=True,
img_prefix='VOC2007' if dataset == 'VOCDataset' else '')
assert custom_dataset.CLASSES != original_classes
assert custom_dataset.CLASSES == ['foo']
# Test default behavior
custom_dataset = dataset_class(
ann_file=MagicMock(),
pipeline=[],
classes=None,
test_mode=True,
img_prefix='VOC2007' if dataset == 'VOCDataset' else '')
assert custom_dataset.CLASSES == original_classes
# Test sending file path
import tempfile
tmp_file = tempfile.NamedTemporaryFile()
with open(tmp_file.name, 'w') as f:
f.write('bus\ncar\n')
custom_dataset = dataset_class(
ann_file=MagicMock(),
pipeline=[],
classes=tmp_file.name,
test_mode=True,
img_prefix='VOC2007' if dataset == 'VOCDataset' else '')
tmp_file.close()
assert custom_dataset.CLASSES != original_classes
assert custom_dataset.CLASSES == ['bus', 'car']
def test_dataset_wrapper():
CustomDataset.load_annotations = MagicMock()
CustomDataset.__getitem__ = MagicMock(side_effect=lambda idx: idx)
dataset_a = CustomDataset(
ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='')
len_a = 10
cat_ids_list_a = [
np.random.randint(0, 80, num).tolist()
for num in np.random.randint(1, 20, len_a)
]
dataset_a.data_infos = MagicMock()
dataset_a.data_infos.__len__.return_value = len_a
dataset_a.get_cat_ids = MagicMock(
side_effect=lambda idx: cat_ids_list_a[idx])
dataset_b = CustomDataset(
ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='')
len_b = 20
cat_ids_list_b = [
np.random.randint(0, 80, num).tolist()
for num in np.random.randint(1, 20, len_b)
]
dataset_b.data_infos = MagicMock()
dataset_b.data_infos.__len__.return_value = len_b
dataset_b.get_cat_ids = MagicMock(
side_effect=lambda idx: cat_ids_list_b[idx])
concat_dataset = ConcatDataset([dataset_a, dataset_b])
assert concat_dataset[5] == 5
assert concat_dataset[25] == 15
assert concat_dataset.get_cat_ids(5) == cat_ids_list_a[5]
assert concat_dataset.get_cat_ids(25) == cat_ids_list_b[15]
assert len(concat_dataset) == len(dataset_a) + len(dataset_b)
repeat_dataset = RepeatDataset(dataset_a, 10)
assert repeat_dataset[5] == 5
assert repeat_dataset[15] == 5
assert repeat_dataset[27] == 7
assert repeat_dataset.get_cat_ids(5) == cat_ids_list_a[5]
assert repeat_dataset.get_cat_ids(15) == cat_ids_list_a[5]
assert repeat_dataset.get_cat_ids(27) == cat_ids_list_a[7]
assert len(repeat_dataset) == 10 * len(dataset_a)
category_freq = defaultdict(int)
for cat_ids in cat_ids_list_a:
cat_ids = set(cat_ids)
for cat_id in cat_ids:
category_freq[cat_id] += 1
for k, v in category_freq.items():
category_freq[k] = v / len(cat_ids_list_a)
mean_freq = np.mean(list(category_freq.values()))
repeat_thr = mean_freq
category_repeat = {
cat_id: max(1.0, math.sqrt(repeat_thr / cat_freq))
for cat_id, cat_freq in category_freq.items()
}
repeat_factors = []
for cat_ids in cat_ids_list_a:
cat_ids = set(cat_ids)
repeat_factor = max({category_repeat[cat_id] for cat_id in cat_ids})
repeat_factors.append(math.ceil(repeat_factor))
repeat_factors_cumsum = np.cumsum(repeat_factors)
repeat_factor_dataset = ClassBalancedDataset(dataset_a, repeat_thr)
assert len(repeat_factor_dataset) == repeat_factors_cumsum[-1]
for idx in np.random.randint(0, len(repeat_factor_dataset), 3):
assert repeat_factor_dataset[idx] == bisect.bisect_right(
repeat_factors_cumsum, idx)
@patch('mmdet.apis.single_gpu_test', MagicMock)
@patch('mmdet.apis.multi_gpu_test', MagicMock)
@pytest.mark.parametrize('EvalHookParam', (EvalHook, DistEvalHook))
def test_evaluation_hook(EvalHookParam):
# create dummy data
dataloader = DataLoader(torch.ones((5, 2)))
# 0.1. dataloader is not a DataLoader object
with pytest.raises(TypeError):
EvalHookParam(dataloader=MagicMock(), interval=-1)
# 0.2. negative interval
with pytest.raises(ValueError):
EvalHookParam(dataloader, interval=-1)
# 1. start=None, interval=1: perform evaluation after each epoch.
runner = _build_demo_runner()
evalhook = EvalHookParam(dataloader, interval=1)
evalhook.evaluate = MagicMock()
runner.register_hook(evalhook)
runner.run([dataloader], [('train', 1)], 2)
assert evalhook.evaluate.call_count == 2 # after epoch 1 & 2
# 2. start=1, interval=1: perform evaluation after each epoch.
runner = _build_demo_runner()
evalhook = EvalHookParam(dataloader, start=1, interval=1)
evalhook.evaluate = MagicMock()
runner.register_hook(evalhook)
runner.run([dataloader], [('train', 1)], 2)
assert evalhook.evaluate.call_count == 2 # after epoch 1 & 2
# 3. start=None, interval=2: perform evaluation after epoch 2, 4, 6, etc
runner = _build_demo_runner()
evalhook = EvalHookParam(dataloader, interval=2)
evalhook.evaluate = MagicMock()
runner.register_hook(evalhook)
runner.run([dataloader], [('train', 1)], 2)
assert evalhook.evaluate.call_count == 1 # after epoch 2
# 4. start=1, interval=2: perform evaluation after epoch 1, 3, 5, etc
runner = _build_demo_runner()
evalhook = EvalHookParam(dataloader, start=1, interval=2)
evalhook.evaluate = MagicMock()
runner.register_hook(evalhook)
runner.run([dataloader], [('train', 1)], 3)
assert evalhook.evaluate.call_count == 2 # after epoch 1 & 3
# 5. start=0/negative, interval=1: perform evaluation after each epoch and
# before epoch 1.
runner = _build_demo_runner()
evalhook = EvalHookParam(dataloader, start=0)
evalhook.evaluate = MagicMock()
runner.register_hook(evalhook)
runner.run([dataloader], [('train', 1)], 2)
assert evalhook.evaluate.call_count == 3 # before epoch1 and after e1 & e2
runner = _build_demo_runner()
with pytest.warns(UserWarning):
evalhook = EvalHookParam(dataloader, start=-2)
evalhook.evaluate = MagicMock()
runner.register_hook(evalhook)
runner.run([dataloader], [('train', 1)], 2)
assert evalhook.evaluate.call_count == 3 # before epoch1 and after e1 & e2
# 6. resuming from epoch i, start = x (x<=i), interval =1: perform
# evaluation after each epoch and before the first epoch.
runner = _build_demo_runner()
evalhook = EvalHookParam(dataloader, start=1)
evalhook.evaluate = MagicMock()
runner.register_hook(evalhook)
runner._epoch = 2
runner.run([dataloader], [('train', 1)], 3)
assert evalhook.evaluate.call_count == 2 # before & after epoch 3
# 7. resuming from epoch i, start = i+1/None, interval =1: perform
# evaluation after each epoch.
runner = _build_demo_runner()
evalhook = EvalHookParam(dataloader, start=2)
evalhook.evaluate = MagicMock()
runner.register_hook(evalhook)
runner._epoch = 1
runner.run([dataloader], [('train', 1)], 3)
assert evalhook.evaluate.call_count == 2 # after epoch 2 & 3
def _build_demo_runner():
class Model(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(2, 1)
def forward(self, x):
return self.linear(x)
def train_step(self, x, optimizer, **kwargs):
return dict(loss=self(x))
def val_step(self, x, optimizer, **kwargs):
return dict(loss=self(x))
model = Model()
tmp_dir = tempfile.mkdtemp()
runner = EpochBasedRunner(
model=model, work_dir=tmp_dir, logger=logging.getLogger())
return runner
@pytest.mark.parametrize('classes, expected_length', [(['bus'], 2),
(['car'], 1),
(['bus', 'car'], 2)])
def test_allow_empty_images(classes, expected_length):
dataset_class = DATASETS.get('CocoDataset')
# Filter empty images
filtered_dataset = dataset_class(
ann_file='tests/data/coco_sample.json',
img_prefix='tests/data',
pipeline=[],
classes=classes,
filter_empty_gt=True)
# Get all
full_dataset = dataset_class(
ann_file='tests/data/coco_sample.json',
img_prefix='tests/data',
pipeline=[],
classes=classes,
filter_empty_gt=False)
assert len(filtered_dataset) == expected_length
assert len(filtered_dataset.img_ids) == expected_length
assert len(full_dataset) == 3
assert len(full_dataset.img_ids) == 3
assert filtered_dataset.CLASSES == classes
assert full_dataset.CLASSES == classes