mirror of https://github.com/open-mmlab/mmocr.git
529 lines
17 KiB
Python
529 lines
17 KiB
Python
# Copyright (c) OpenMMLab. All rights reserved.
|
|
"""pytest tests/test_detector.py."""
|
|
import copy
|
|
import tempfile
|
|
from functools import partial
|
|
from os.path import dirname, exists, join
|
|
|
|
import numpy as np
|
|
import pytest
|
|
import torch
|
|
|
|
import mmocr.core.evaluation.utils as utils
|
|
from mmocr.utils import revert_sync_batchnorm
|
|
|
|
|
|
def _demo_mm_inputs(num_kernels=0, input_shape=(1, 3, 300, 300),
|
|
num_items=None, num_classes=1): # yapf: disable
|
|
"""Create a superset of inputs needed to run test or train batches.
|
|
|
|
Args:
|
|
input_shape (tuple): Input batch dimensions.
|
|
|
|
num_items (None | list[int]): Specifies the number of boxes
|
|
for each batch item.
|
|
|
|
num_classes (int): Number of distinct labels a box might have.
|
|
"""
|
|
from mmdet.core import BitmapMasks
|
|
|
|
(N, C, H, W) = input_shape
|
|
|
|
rng = np.random.RandomState(0)
|
|
|
|
imgs = rng.rand(*input_shape)
|
|
|
|
img_metas = [{
|
|
'img_shape': (H, W, C),
|
|
'ori_shape': (H, W, C),
|
|
'pad_shape': (H, W, C),
|
|
'filename': '<demo>.png',
|
|
'scale_factor': np.array([1, 1, 1, 1]),
|
|
'flip': False,
|
|
} for _ in range(N)]
|
|
|
|
gt_bboxes = []
|
|
gt_labels = []
|
|
gt_masks = []
|
|
gt_kernels = []
|
|
gt_effective_mask = []
|
|
|
|
for batch_idx in range(N):
|
|
if num_items is None:
|
|
num_boxes = rng.randint(1, 10)
|
|
else:
|
|
num_boxes = num_items[batch_idx]
|
|
|
|
cx, cy, bw, bh = rng.rand(num_boxes, 4).T
|
|
|
|
tl_x = ((cx * W) - (W * bw / 2)).clip(0, W)
|
|
tl_y = ((cy * H) - (H * bh / 2)).clip(0, H)
|
|
br_x = ((cx * W) + (W * bw / 2)).clip(0, W)
|
|
br_y = ((cy * H) + (H * bh / 2)).clip(0, H)
|
|
|
|
boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T
|
|
class_idxs = [0] * num_boxes
|
|
|
|
gt_bboxes.append(torch.FloatTensor(boxes))
|
|
gt_labels.append(torch.LongTensor(class_idxs))
|
|
kernels = []
|
|
for kernel_inx in range(num_kernels):
|
|
kernel = np.random.rand(H, W)
|
|
kernels.append(kernel)
|
|
gt_kernels.append(BitmapMasks(kernels, H, W))
|
|
gt_effective_mask.append(BitmapMasks([np.ones((H, W))], H, W))
|
|
|
|
mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8)
|
|
gt_masks.append(BitmapMasks(mask, H, W))
|
|
|
|
mm_inputs = {
|
|
'imgs': torch.FloatTensor(imgs).requires_grad_(True),
|
|
'img_metas': img_metas,
|
|
'gt_bboxes': gt_bboxes,
|
|
'gt_labels': gt_labels,
|
|
'gt_bboxes_ignore': None,
|
|
'gt_masks': gt_masks,
|
|
'gt_kernels': gt_kernels,
|
|
'gt_mask': gt_effective_mask,
|
|
'gt_thr_mask': gt_effective_mask,
|
|
'gt_text_mask': gt_effective_mask,
|
|
'gt_center_region_mask': gt_effective_mask,
|
|
'gt_radius_map': gt_kernels,
|
|
'gt_sin_map': gt_kernels,
|
|
'gt_cos_map': gt_kernels,
|
|
}
|
|
return mm_inputs
|
|
|
|
|
|
def _get_config_directory():
|
|
"""Find the predefined detector config directory."""
|
|
try:
|
|
# Assume we are running in the source mmocr repo
|
|
repo_dpath = dirname(dirname(dirname(__file__)))
|
|
except NameError:
|
|
# For IPython development when this __file__ is not defined
|
|
import mmocr
|
|
repo_dpath = dirname(dirname(mmocr.__file__))
|
|
config_dpath = join(repo_dpath, 'configs')
|
|
if not exists(config_dpath):
|
|
raise Exception('Cannot find config path')
|
|
return config_dpath
|
|
|
|
|
|
def _get_config_module(fname):
|
|
"""Load a configuration as a python module."""
|
|
from mmcv import Config
|
|
config_dpath = _get_config_directory()
|
|
config_fpath = join(config_dpath, fname)
|
|
config_mod = Config.fromfile(config_fpath)
|
|
return config_mod
|
|
|
|
|
|
def _get_detector_cfg(fname):
|
|
"""Grab configs necessary to create a detector.
|
|
|
|
These are deep copied to allow for safe modification of parameters without
|
|
influencing other tests.
|
|
"""
|
|
config = _get_config_module(fname)
|
|
model = copy.deepcopy(config.model)
|
|
return model
|
|
|
|
|
|
@pytest.mark.parametrize('cfg_file', [
|
|
'textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py',
|
|
'textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py',
|
|
'textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py'
|
|
])
|
|
def test_ocr_mask_rcnn(cfg_file):
|
|
model = _get_detector_cfg(cfg_file)
|
|
model['pretrained'] = None
|
|
|
|
from mmocr.models import build_detector
|
|
detector = build_detector(model)
|
|
|
|
input_shape = (1, 3, 224, 224)
|
|
mm_inputs = _demo_mm_inputs(0, input_shape)
|
|
|
|
imgs = mm_inputs.pop('imgs')
|
|
img_metas = mm_inputs.pop('img_metas')
|
|
gt_labels = mm_inputs.pop('gt_labels')
|
|
gt_masks = mm_inputs.pop('gt_masks')
|
|
|
|
# Test forward train
|
|
gt_bboxes = mm_inputs['gt_bboxes']
|
|
losses = detector.forward(
|
|
imgs,
|
|
img_metas,
|
|
gt_bboxes=gt_bboxes,
|
|
gt_labels=gt_labels,
|
|
gt_masks=gt_masks)
|
|
assert isinstance(losses, dict)
|
|
|
|
# Test forward test
|
|
with torch.no_grad():
|
|
img_list = [g[None, :] for g in imgs]
|
|
batch_results = []
|
|
for one_img, one_meta in zip(img_list, img_metas):
|
|
result = detector.forward([one_img], [[one_meta]],
|
|
return_loss=False)
|
|
batch_results.append(result)
|
|
|
|
# Test get_boundary
|
|
results = ([[[1]]], [[
|
|
np.array([[1, 1, 0, 0, 0], [1, 1, 0, 0, 0], [0, 0, 0, 0, 0],
|
|
[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]])
|
|
]])
|
|
|
|
boundaries = detector.get_boundary(results)
|
|
assert utils.boundary_iou(boundaries['boundary_result'][0][:-1],
|
|
[1, 1, 0, 1, 0, 0, 1, 0]) == 1
|
|
|
|
# Test show_result
|
|
|
|
results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
|
|
img = np.random.rand(5, 5)
|
|
detector.show_result(img, results)
|
|
|
|
|
|
@pytest.mark.parametrize('cfg_file', [
|
|
'textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py',
|
|
'textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py',
|
|
'textdet/panet/panet_r50_fpem_ffm_600e_icdar2017.py'
|
|
])
|
|
def test_panet(cfg_file):
|
|
model = _get_detector_cfg(cfg_file)
|
|
model['pretrained'] = None
|
|
|
|
from mmocr.models import build_detector
|
|
detector = build_detector(model)
|
|
detector = revert_sync_batchnorm(detector)
|
|
|
|
input_shape = (1, 3, 224, 224)
|
|
num_kernels = 2
|
|
mm_inputs = _demo_mm_inputs(num_kernels, input_shape)
|
|
|
|
imgs = mm_inputs.pop('imgs')
|
|
img_metas = mm_inputs.pop('img_metas')
|
|
gt_kernels = mm_inputs.pop('gt_kernels')
|
|
gt_mask = mm_inputs.pop('gt_mask')
|
|
|
|
# Test forward train
|
|
losses = detector.forward(
|
|
imgs, img_metas, gt_kernels=gt_kernels, gt_mask=gt_mask)
|
|
assert isinstance(losses, dict)
|
|
|
|
# Test forward test
|
|
with torch.no_grad():
|
|
img_list = [g[None, :] for g in imgs]
|
|
batch_results = []
|
|
for one_img, one_meta in zip(img_list, img_metas):
|
|
result = detector.forward([one_img], [[one_meta]],
|
|
return_loss=False)
|
|
batch_results.append(result)
|
|
|
|
# Test onnx export
|
|
detector.forward = partial(
|
|
detector.simple_test, img_metas=img_metas, rescale=True)
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
onnx_path = f'{tmpdirname}/tmp.onnx'
|
|
torch.onnx.export(
|
|
detector, (img_list[0], ),
|
|
onnx_path,
|
|
input_names=['input'],
|
|
output_names=['output'],
|
|
export_params=True,
|
|
keep_initializers_as_inputs=False)
|
|
|
|
# Test show result
|
|
results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
|
|
img = np.random.rand(5, 5)
|
|
detector.show_result(img, results)
|
|
|
|
|
|
@pytest.mark.parametrize('cfg_file', [
|
|
'textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py',
|
|
'textdet/psenet/psenet_r50_fpnf_600e_icdar2017.py',
|
|
'textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py'
|
|
])
|
|
def test_psenet(cfg_file):
|
|
model = _get_detector_cfg(cfg_file)
|
|
model['pretrained'] = None
|
|
|
|
from mmocr.models import build_detector
|
|
detector = build_detector(model)
|
|
detector = revert_sync_batchnorm(detector)
|
|
|
|
input_shape = (1, 3, 224, 224)
|
|
num_kernels = 7
|
|
mm_inputs = _demo_mm_inputs(num_kernels, input_shape)
|
|
|
|
imgs = mm_inputs.pop('imgs')
|
|
img_metas = mm_inputs.pop('img_metas')
|
|
gt_kernels = mm_inputs.pop('gt_kernels')
|
|
gt_mask = mm_inputs.pop('gt_mask')
|
|
|
|
# Test forward train
|
|
losses = detector.forward(
|
|
imgs, img_metas, gt_kernels=gt_kernels, gt_mask=gt_mask)
|
|
assert isinstance(losses, dict)
|
|
|
|
# Test forward test
|
|
with torch.no_grad():
|
|
img_list = [g[None, :] for g in imgs]
|
|
batch_results = []
|
|
for one_img, one_meta in zip(img_list, img_metas):
|
|
result = detector.forward([one_img], [[one_meta]],
|
|
return_loss=False)
|
|
batch_results.append(result)
|
|
|
|
# Test show result
|
|
results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
|
|
img = np.random.rand(5, 5)
|
|
detector.show_result(img, results)
|
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda')
|
|
@pytest.mark.parametrize('cfg_file', [
|
|
'textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py',
|
|
'textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py'
|
|
])
|
|
def test_dbnet(cfg_file):
|
|
model = _get_detector_cfg(cfg_file)
|
|
model['pretrained'] = None
|
|
|
|
from mmocr.models import build_detector
|
|
detector = build_detector(model)
|
|
detector = revert_sync_batchnorm(detector)
|
|
detector = detector.cuda()
|
|
input_shape = (1, 3, 224, 224)
|
|
num_kernels = 7
|
|
mm_inputs = _demo_mm_inputs(num_kernels, input_shape)
|
|
|
|
imgs = mm_inputs.pop('imgs')
|
|
imgs = imgs.cuda()
|
|
img_metas = mm_inputs.pop('img_metas')
|
|
gt_shrink = mm_inputs.pop('gt_kernels')
|
|
gt_shrink_mask = mm_inputs.pop('gt_mask')
|
|
gt_thr = mm_inputs.pop('gt_masks')
|
|
gt_thr_mask = mm_inputs.pop('gt_thr_mask')
|
|
|
|
# Test forward train
|
|
losses = detector.forward(
|
|
imgs,
|
|
img_metas,
|
|
gt_shrink=gt_shrink,
|
|
gt_shrink_mask=gt_shrink_mask,
|
|
gt_thr=gt_thr,
|
|
gt_thr_mask=gt_thr_mask)
|
|
assert isinstance(losses, dict)
|
|
|
|
# Test forward test
|
|
with torch.no_grad():
|
|
img_list = [g[None, :] for g in imgs]
|
|
batch_results = []
|
|
for one_img, one_meta in zip(img_list, img_metas):
|
|
result = detector.forward([one_img], [[one_meta]],
|
|
return_loss=False)
|
|
batch_results.append(result)
|
|
|
|
# Test show result
|
|
results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
|
|
img = np.random.rand(5, 5)
|
|
detector.show_result(img, results)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'cfg_file',
|
|
['textdet/textsnake/'
|
|
'textsnake_r50_fpn_unet_1200e_ctw1500.py'])
|
|
def test_textsnake(cfg_file):
|
|
model = _get_detector_cfg(cfg_file)
|
|
model['pretrained'] = None
|
|
|
|
from mmocr.models import build_detector
|
|
detector = build_detector(model)
|
|
detector = revert_sync_batchnorm(detector)
|
|
input_shape = (1, 3, 224, 224)
|
|
num_kernels = 1
|
|
mm_inputs = _demo_mm_inputs(num_kernels, input_shape)
|
|
|
|
imgs = mm_inputs.pop('imgs')
|
|
img_metas = mm_inputs.pop('img_metas')
|
|
gt_text_mask = mm_inputs.pop('gt_text_mask')
|
|
gt_center_region_mask = mm_inputs.pop('gt_center_region_mask')
|
|
gt_mask = mm_inputs.pop('gt_mask')
|
|
gt_radius_map = mm_inputs.pop('gt_radius_map')
|
|
gt_sin_map = mm_inputs.pop('gt_sin_map')
|
|
gt_cos_map = mm_inputs.pop('gt_cos_map')
|
|
|
|
# Test forward train
|
|
losses = detector.forward(
|
|
imgs,
|
|
img_metas,
|
|
gt_text_mask=gt_text_mask,
|
|
gt_center_region_mask=gt_center_region_mask,
|
|
gt_mask=gt_mask,
|
|
gt_radius_map=gt_radius_map,
|
|
gt_sin_map=gt_sin_map,
|
|
gt_cos_map=gt_cos_map)
|
|
assert isinstance(losses, dict)
|
|
|
|
# Test forward test get_boundary
|
|
maps = torch.zeros((1, 5, 224, 224), dtype=torch.float)
|
|
maps[:, 0:2, :, :] = -10.
|
|
maps[:, 0, 60:100, 12:212] = 10.
|
|
maps[:, 1, 70:90, 22:202] = 10.
|
|
maps[:, 2, 70:90, 22:202] = 0.
|
|
maps[:, 3, 70:90, 22:202] = 1.
|
|
maps[:, 4, 70:90, 22:202] = 10.
|
|
|
|
one_meta = img_metas[0]
|
|
result = detector.bbox_head.get_boundary(maps, [one_meta], False)
|
|
assert 'boundary_result' in result
|
|
assert 'filename' in result
|
|
|
|
# Test show result
|
|
results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
|
|
img = np.random.rand(5, 5)
|
|
detector.show_result(img, results)
|
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda')
|
|
@pytest.mark.parametrize('cfg_file', [
|
|
'textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py',
|
|
'textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py'
|
|
])
|
|
def test_fcenet(cfg_file):
|
|
model = _get_detector_cfg(cfg_file)
|
|
model['pretrained'] = None
|
|
|
|
from mmocr.models import build_detector
|
|
detector = build_detector(model)
|
|
detector = revert_sync_batchnorm(detector)
|
|
detector = detector.cuda()
|
|
|
|
fourier_degree = 5
|
|
input_shape = (1, 3, 256, 256)
|
|
(n, c, h, w) = input_shape
|
|
|
|
imgs = torch.randn(n, c, h, w).float().cuda()
|
|
img_metas = [{
|
|
'img_shape': (h, w, c),
|
|
'ori_shape': (h, w, c),
|
|
'pad_shape': (h, w, c),
|
|
'filename': '<demo>.png',
|
|
'scale_factor': np.array([1, 1, 1, 1]),
|
|
'flip': False,
|
|
} for _ in range(n)]
|
|
|
|
p3_maps = []
|
|
p4_maps = []
|
|
p5_maps = []
|
|
for _ in range(n):
|
|
p3_maps.append(
|
|
np.random.random((5 + 4 * fourier_degree, h // 8, w // 8)))
|
|
p4_maps.append(
|
|
np.random.random((5 + 4 * fourier_degree, h // 16, w // 16)))
|
|
p5_maps.append(
|
|
np.random.random((5 + 4 * fourier_degree, h // 32, w // 32)))
|
|
|
|
# Test forward train
|
|
losses = detector.forward(
|
|
imgs, img_metas, p3_maps=p3_maps, p4_maps=p4_maps, p5_maps=p5_maps)
|
|
assert isinstance(losses, dict)
|
|
|
|
# Test forward test
|
|
with torch.no_grad():
|
|
img_list = [g[None, :] for g in imgs]
|
|
batch_results = []
|
|
for one_img, one_meta in zip(img_list, img_metas):
|
|
result = detector.forward([one_img], [[one_meta]],
|
|
return_loss=False)
|
|
batch_results.append(result)
|
|
|
|
# Test show result
|
|
results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
|
|
img = np.random.rand(5, 5)
|
|
detector.show_result(img, results)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'cfg_file', ['textdet/drrg/'
|
|
'drrg_r50_fpn_unet_1200e_ctw1500.py'])
|
|
def test_drrg(cfg_file):
|
|
model = _get_detector_cfg(cfg_file)
|
|
model['pretrained'] = None
|
|
|
|
from mmocr.models import build_detector
|
|
detector = build_detector(model)
|
|
detector = revert_sync_batchnorm(detector)
|
|
|
|
input_shape = (1, 3, 224, 224)
|
|
num_kernels = 1
|
|
mm_inputs = _demo_mm_inputs(num_kernels, input_shape)
|
|
|
|
imgs = mm_inputs.pop('imgs')
|
|
img_metas = mm_inputs.pop('img_metas')
|
|
gt_text_mask = mm_inputs.pop('gt_text_mask')
|
|
gt_center_region_mask = mm_inputs.pop('gt_center_region_mask')
|
|
gt_mask = mm_inputs.pop('gt_mask')
|
|
gt_top_height_map = mm_inputs.pop('gt_radius_map')
|
|
gt_bot_height_map = gt_top_height_map.copy()
|
|
gt_sin_map = mm_inputs.pop('gt_sin_map')
|
|
gt_cos_map = mm_inputs.pop('gt_cos_map')
|
|
num_rois = 32
|
|
x = np.random.randint(4, 224, (num_rois, 1))
|
|
y = np.random.randint(4, 224, (num_rois, 1))
|
|
h = 4 * np.ones((num_rois, 1))
|
|
w = 4 * np.ones((num_rois, 1))
|
|
angle = (np.random.random_sample((num_rois, 1)) * 2 - 1) * np.pi / 2
|
|
cos, sin = np.cos(angle), np.sin(angle)
|
|
comp_labels = np.random.randint(1, 3, (num_rois, 1))
|
|
num_rois = num_rois * np.ones((num_rois, 1))
|
|
comp_attribs = np.hstack([num_rois, x, y, h, w, cos, sin, comp_labels])
|
|
gt_comp_attribs = np.expand_dims(comp_attribs.astype(np.float32), axis=0)
|
|
|
|
# Test forward train
|
|
losses = detector.forward(
|
|
imgs,
|
|
img_metas,
|
|
gt_text_mask=gt_text_mask,
|
|
gt_center_region_mask=gt_center_region_mask,
|
|
gt_mask=gt_mask,
|
|
gt_top_height_map=gt_top_height_map,
|
|
gt_bot_height_map=gt_bot_height_map,
|
|
gt_sin_map=gt_sin_map,
|
|
gt_cos_map=gt_cos_map,
|
|
gt_comp_attribs=gt_comp_attribs)
|
|
assert isinstance(losses, dict)
|
|
|
|
# Test forward test
|
|
model['bbox_head']['in_channels'] = 6
|
|
model['bbox_head']['text_region_thr'] = 0.8
|
|
model['bbox_head']['center_region_thr'] = 0.8
|
|
detector = build_detector(model)
|
|
maps = torch.zeros((1, 6, 224, 224), dtype=torch.float)
|
|
maps[:, 0:2, :, :] = -10.
|
|
maps[:, 0, 60:100, 50:170] = 10.
|
|
maps[:, 1, 75:85, 60:160] = 10.
|
|
maps[:, 2, 75:85, 60:160] = 0.
|
|
maps[:, 3, 75:85, 60:160] = 1.
|
|
maps[:, 4, 75:85, 60:160] = 10.
|
|
maps[:, 5, 75:85, 60:160] = 10.
|
|
|
|
with torch.no_grad():
|
|
full_pass_weight = torch.zeros((6, 6, 1, 1))
|
|
for i in range(6):
|
|
full_pass_weight[i, i, 0, 0] = 1
|
|
detector.bbox_head.out_conv.weight.data = full_pass_weight
|
|
detector.bbox_head.out_conv.bias.data.fill_(0.)
|
|
outs = detector.bbox_head.single_test(maps)
|
|
boundaries = detector.bbox_head.get_boundary(*outs, img_metas, True)
|
|
assert len(boundaries) == 1
|
|
|
|
# Test show result
|
|
results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
|
|
img = np.random.rand(5, 5)
|
|
detector.show_result(img, results)
|