[Remove] remove unuse dataset and pipeline

This commit is contained in:
liukuikun 2022-07-12 03:50:33 +00:00 committed by gaotongxiao
parent de78a8839f
commit 83aac48491
32 changed files with 9 additions and 4037 deletions

View File

@ -1,5 +1,4 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .builder import LOADERS, PARSERS
from .icdar_dataset import IcdarDataset
from .ocr_dataset import OCRDataset
from .ocr_seg_dataset import OCRSegDataset
@ -9,6 +8,6 @@ from .recog_text_dataset import RecogTextDataset
from .wildreceipt_dataset import WildReceiptDataset
__all__ = [
'IcdarDataset', 'OCRDataset', 'OCRSegDataset', 'PARSERS', 'LOADERS',
'RecogLMDBDataset', 'RecogTextDataset', 'WildReceiptDataset'
'IcdarDataset', 'OCRDataset', 'OCRSegDataset', 'RecogLMDBDataset',
'RecogTextDataset', 'WildReceiptDataset'
]

View File

@ -1,6 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmocr.registry import TRANSFORMS
LOADERS = TRANSFORMS
PARSERS = TRANSFORMS

View File

@ -1,7 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .formatting import PackKIEInputs, PackTextDetInputs, PackTextRecogInputs
from .loading import LoadKIEAnnotations, LoadOCRAnnotations
from .ocr_seg_targets import OCRSegTargets
from .ocr_transforms import (FancyPCA, NormalizeOCR, OnlineCropOCR,
OpencvToPil, PilToOpencv, RandomPaddingOCR,
RandomRotateImageBox, ResizeOCR, ToTensorOCR)
@ -11,20 +10,16 @@ from .processing import (BoundedScaleAspectJitter, FixInvalidPolygon,
ShortScaleAspectJitter, SourceImagePad,
TextDetRandomCrop, TextDetRandomCropFlip)
from .test_time_aug import MultiRotateAugOCR
from .textdet_targets import (DBNetTargets, FCENetTargets, PANetTargets,
TextSnakeTargets)
from .transforms import ScaleAspectJitter
from .wrappers import ImgAug, TorchVisionWrapper
__all__ = [
'LoadOCRAnnotations', 'NormalizeOCR', 'OnlineCropOCR', 'ResizeOCR',
'ToTensorOCR', 'DBNetTargets', 'PANetTargets', 'RandomRotate',
'ScaleAspectJitter', 'MultiRotateAugOCR', 'OCRSegTargets', 'FancyPCA',
'ToTensorOCR', 'RandomRotate', 'MultiRotateAugOCR', 'FancyPCA',
'RandomPaddingOCR', 'ImgAug', 'RandomRotateImageBox', 'OpencvToPil',
'PilToOpencv', 'SourceImagePad', 'TextSnakeTargets', 'FCENetTargets',
'TextDetRandomCropFlip', 'PyramidRescale', 'TorchVisionWrapper', 'Resize',
'RandomCrop', 'TextDetRandomCrop', 'RandomCrop', 'PackTextDetInputs',
'PackTextRecogInputs', 'RescaleToHeight', 'PadToWidth',
'ShortScaleAspectJitter', 'RandomFlip', 'BoundedScaleAspectJitter',
'PackKIEInputs', 'LoadKIEAnnotations', 'FixInvalidPolygon'
'PilToOpencv', 'SourceImagePad', 'TextDetRandomCropFlip', 'PyramidRescale',
'TorchVisionWrapper', 'Resize', 'RandomCrop', 'TextDetRandomCrop',
'RandomCrop', 'PackTextDetInputs', 'PackTextRecogInputs',
'RescaleToHeight', 'PadToWidth', 'ShortScaleAspectJitter', 'RandomFlip',
'BoundedScaleAspectJitter', 'PackKIEInputs', 'LoadKIEAnnotations',
'FixInvalidPolygon'
]

View File

@ -1,201 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np
from mmdet.core import BitmapMasks
import mmocr.utils.check_argument as check_argument
from mmocr.registry import MODELS, TRANSFORMS
@TRANSFORMS.register_module()
class OCRSegTargets:
"""Generate gt shrunk kernels for segmentation based OCR framework.
Args:
label_convertor (dict): Dictionary to construct label_convertor
to convert char to index.
attn_shrink_ratio (float): The area shrunk ratio
between attention kernels and gt text masks.
seg_shrink_ratio (float): The area shrunk ratio
between segmentation kernels and gt text masks.
box_type (str): Character box type, should be either
'char_rects' or 'char_quads', with 'char_rects'
for rectangle with ``xyxy`` style and 'char_quads'
for quadrangle with ``x1y1x2y2x3y3x4y4`` style.
"""
def __init__(self,
label_convertor=None,
attn_shrink_ratio=0.5,
seg_shrink_ratio=0.25,
box_type='char_rects',
pad_val=255):
assert isinstance(attn_shrink_ratio, float)
assert isinstance(seg_shrink_ratio, float)
assert 0. < attn_shrink_ratio < 1.0
assert 0. < seg_shrink_ratio < 1.0
assert label_convertor is not None
assert box_type in ('char_rects', 'char_quads')
self.attn_shrink_ratio = attn_shrink_ratio
self.seg_shrink_ratio = seg_shrink_ratio
self.label_convertor = MODELS.build(label_convertor)
self.box_type = box_type
self.pad_val = pad_val
def shrink_char_quad(self, char_quad, shrink_ratio):
"""Shrink char box in style of quadrangle.
Args:
char_quad (list[float]): Char box with format
[x1, y1, x2, y2, x3, y3, x4, y4].
shrink_ratio (float): The area shrunk ratio
between gt kernels and gt text masks.
"""
points = [[char_quad[0], char_quad[1]], [char_quad[2], char_quad[3]],
[char_quad[4], char_quad[5]], [char_quad[6], char_quad[7]]]
shrink_points = []
for p_idx, point in enumerate(points):
p1 = points[(p_idx + 3) % 4]
p2 = points[(p_idx + 1) % 4]
dist1 = self.l2_dist_two_points(p1, point)
dist2 = self.l2_dist_two_points(p2, point)
min_dist = min(dist1, dist2)
v1 = [p1[0] - point[0], p1[1] - point[1]]
v2 = [p2[0] - point[0], p2[1] - point[1]]
temp_dist1 = (shrink_ratio * min_dist /
dist1) if min_dist != 0 else 0.
temp_dist2 = (shrink_ratio * min_dist /
dist2) if min_dist != 0 else 0.
v1 = [temp * temp_dist1 for temp in v1]
v2 = [temp * temp_dist2 for temp in v2]
shrink_point = [
round(point[0] + v1[0] + v2[0]),
round(point[1] + v1[1] + v2[1])
]
shrink_points.append(shrink_point)
poly = np.array(shrink_points)
return poly
def shrink_char_rect(self, char_rect, shrink_ratio):
"""Shrink char box in style of rectangle.
Args:
char_rect (list[float]): Char box with format
[x_min, y_min, x_max, y_max].
shrink_ratio (float): The area shrunk ratio
between gt kernels and gt text masks.
"""
x_min, y_min, x_max, y_max = char_rect
w = x_max - x_min
h = y_max - y_min
x_min_s = round((x_min + x_max - w * shrink_ratio) / 2)
y_min_s = round((y_min + y_max - h * shrink_ratio) / 2)
x_max_s = round((x_min + x_max + w * shrink_ratio) / 2)
y_max_s = round((y_min + y_max + h * shrink_ratio) / 2)
poly = np.array([[x_min_s, y_min_s], [x_max_s, y_min_s],
[x_max_s, y_max_s], [x_min_s, y_max_s]])
return poly
def generate_kernels(self,
resize_shape,
pad_shape,
char_boxes,
char_inds,
shrink_ratio=0.5,
binary=True):
"""Generate char instance kernels for one shrink ratio.
Args:
resize_shape (tuple(int, int)): Image size (height, width)
after resizing.
pad_shape (tuple(int, int)): Image size (height, width)
after padding.
char_boxes (list[list[float]]): The list of char polygons.
char_inds (list[int]): List of char indexes.
shrink_ratio (float): The shrink ratio of kernel.
binary (bool): If True, return binary ndarray
containing 0 & 1 only.
Returns:
char_kernel (ndarray): The text kernel mask of (height, width).
"""
assert isinstance(resize_shape, tuple)
assert isinstance(pad_shape, tuple)
assert check_argument.is_2dlist(char_boxes)
assert check_argument.is_type_list(char_inds, int)
assert isinstance(shrink_ratio, float)
assert isinstance(binary, bool)
char_kernel = np.zeros(pad_shape, dtype=np.int32)
char_kernel[:resize_shape[0], resize_shape[1]:] = self.pad_val
for i, char_box in enumerate(char_boxes):
if self.box_type == 'char_rects':
poly = self.shrink_char_rect(char_box, shrink_ratio)
elif self.box_type == 'char_quads':
poly = self.shrink_char_quad(char_box, shrink_ratio)
fill_value = 1 if binary else char_inds[i]
cv2.fillConvexPoly(char_kernel, poly.astype(np.int32),
(fill_value))
return char_kernel
def l2_dist_two_points(self, p1, p2):
return ((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)**0.5
def __call__(self, results):
img_shape = results['img_shape']
resize_shape = results['resize_shape']
h_scale = 1.0 * resize_shape[0] / img_shape[0]
w_scale = 1.0 * resize_shape[1] / img_shape[1]
char_boxes, char_inds = [], []
char_num = len(results['ann_info'][self.box_type])
for i in range(char_num):
char_box = results['ann_info'][self.box_type][i]
num_points = 2 if self.box_type == 'char_rects' else 4
for j in range(num_points):
char_box[j * 2] = round(char_box[j * 2] * w_scale)
char_box[j * 2 + 1] = round(char_box[j * 2 + 1] * h_scale)
char_boxes.append(char_box)
char = results['ann_info']['chars'][i]
char_ind = self.label_convertor.str2idx([char])[0][0]
char_inds.append(char_ind)
resize_shape = tuple(results['resize_shape'][:2])
pad_shape = tuple(results['pad_shape'][:2])
binary_target = self.generate_kernels(
resize_shape,
pad_shape,
char_boxes,
char_inds,
shrink_ratio=self.attn_shrink_ratio,
binary=True)
seg_target = self.generate_kernels(
resize_shape,
pad_shape,
char_boxes,
char_inds,
shrink_ratio=self.seg_shrink_ratio,
binary=False)
mask = np.ones(pad_shape, dtype=np.int32)
mask[:resize_shape[0], resize_shape[1]:] = 0
results['gt_kernels'] = BitmapMasks([binary_target, seg_target, mask],
pad_shape[0], pad_shape[1])
results['mask_fields'] = ['gt_kernels']
return results

View File

@ -1,13 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .base_textdet_targets import BaseTextDetTargets
from .dbnet_targets import DBNetTargets
from .drrg_targets import DRRGTargets
from .fcenet_targets import FCENetTargets
from .panet_targets import PANetTargets
from .psenet_targets import PSENetTargets
from .textsnake_targets import TextSnakeTargets
__all__ = [
'BaseTextDetTargets', 'PANetTargets', 'PSENetTargets', 'DBNetTargets',
'FCENetTargets', 'TextSnakeTargets', 'DRRGTargets'
]

View File

@ -1,168 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import sys
import cv2
import numpy as np
import pyclipper
from mmcv.utils import print_log
from shapely.geometry import Polygon as plg
import mmocr.utils.check_argument as check_argument
class BaseTextDetTargets:
"""Generate text detector ground truths."""
def __init__(self):
pass
def point2line(self, xs, ys, point_1, point_2):
"""Compute the distance from point to a line. This is adapted from
https://github.com/MhLiao/DB.
Args:
xs (ndarray): The x coordinates of size hxw.
ys (ndarray): The y coordinates of size hxw.
point_1 (ndarray): The first point with shape 1x2.
point_2 (ndarray): The second point with shape 1x2.
Returns:
result (ndarray): The distance matrix of size hxw.
"""
# suppose a triangle with three edge abc with c=point_1 point_2
# a^2
a_square = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
# b^2
b_square = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
# c^2
c_square = np.square(point_1[0] - point_2[0]) + np.square(point_1[1] -
point_2[1])
# -cosC=(c^2-a^2-b^2)/2(ab)
neg_cos_c = (
(c_square - a_square - b_square) /
(np.finfo(np.float32).eps + 2 * np.sqrt(a_square * b_square)))
# sinC^2=1-cosC^2
square_sin = 1 - np.square(neg_cos_c)
square_sin = np.nan_to_num(square_sin)
# distance=a*b*sinC/c=a*h/c=2*area/c
result = np.sqrt(a_square * b_square * square_sin /
(np.finfo(np.float32).eps + c_square))
# set result to minimum edge if C<pi/2
result[neg_cos_c < 0] = np.sqrt(np.fmin(a_square,
b_square))[neg_cos_c < 0]
return result
def polygon_area(self, polygon):
"""Compute the polygon area. Please refer to Green's theorem.
https://en.wikipedia.org/wiki/Green%27s_theorem. This is adapted from
https://github.com/MhLiao/DB.
Args:
polygon (ndarray): The polygon boundary points.
"""
polygon = polygon.reshape(-1, 2)
edge = 0
for i in range(polygon.shape[0]):
next_index = (i + 1) % polygon.shape[0]
edge += (polygon[next_index, 0] - polygon[i, 0]) * (
polygon[next_index, 1] + polygon[i, 1])
return edge / 2.
def polygon_size(self, polygon):
"""Estimate the height and width of the minimum bounding box of the
polygon.
Args:
polygon (ndarray): The polygon point sequence.
Returns:
size (tuple): The height and width of the minimum bounding box.
"""
poly = polygon.reshape(-1, 2)
rect = cv2.minAreaRect(poly.astype(np.int32))
size = rect[1]
return size
def generate_kernels(self,
img_size,
text_polys,
shrink_ratio,
max_shrink=sys.maxsize,
ignore_tags=None):
"""Generate text instance kernels for one shrink ratio.
Args:
img_size (tuple(int, int)): The image size of (height, width).
text_polys (list[list[ndarray]]: The list of text polygons.
shrink_ratio (float): The shrink ratio of kernel.
Returns:
text_kernel (ndarray): The text kernel mask of (height, width).
"""
assert isinstance(img_size, tuple)
assert check_argument.is_2dlist(text_polys)
assert isinstance(shrink_ratio, float)
h, w = img_size
text_kernel = np.zeros((h, w), dtype=np.float32)
for text_ind, poly in enumerate(text_polys):
instance = poly[0].reshape(-1, 2).astype(np.int32)
area = plg(instance).area
peri = cv2.arcLength(instance, True)
distance = min(
int(area * (1 - shrink_ratio * shrink_ratio) / (peri + 0.001) +
0.5), max_shrink)
pco = pyclipper.PyclipperOffset()
pco.AddPath(instance, pyclipper.JT_ROUND,
pyclipper.ET_CLOSEDPOLYGON)
shrunk = np.array(pco.Execute(-distance))
# check shrunk == [] or empty ndarray
if len(shrunk) == 0 or shrunk.size == 0:
if ignore_tags is not None:
ignore_tags[text_ind] = True
continue
try:
shrunk = np.array(shrunk[0]).reshape(-1, 2)
except Exception as e:
print_log(f'{shrunk} with error {e}')
if ignore_tags is not None:
ignore_tags[text_ind] = True
continue
cv2.fillPoly(text_kernel, [shrunk.astype(np.int32)], text_ind + 1)
return text_kernel, ignore_tags
def generate_effective_mask(self, mask_size: tuple, polygons_ignore):
"""Generate effective mask by setting the ineffective regions to 0 and
effective regions to 1.
Args:
mask_size (tuple): The mask size.
polygons_ignore (list[[ndarray]]: The list of ignored text
polygons.
Returns:
mask (ndarray): The effective mask of (height, width).
"""
assert check_argument.is_2dlist(polygons_ignore)
mask = np.ones(mask_size, dtype=np.uint8)
for poly in polygons_ignore:
instance = poly[0].reshape(-1,
2).astype(np.int32).reshape(1, -1, 2)
cv2.fillPoly(mask, instance, 0)
return mask
def generate_targets(self, results):
raise NotImplementedError
def __call__(self, results):
results = self.generate_targets(results)
return results

View File

@ -1,250 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np
import pyclipper
from mmdet.core import BitmapMasks
from shapely.geometry import Polygon
from mmocr.registry import TRANSFORMS
from . import BaseTextDetTargets
@TRANSFORMS.register_module()
class DBNetTargets(BaseTextDetTargets):
"""Generate gt shrunk text, gt threshold map, and their effective region
masks to learn DBNet: Real-time Scene Text Detection with Differentiable
Binarization [https://arxiv.org/abs/1911.08947]. This was partially adapted
from https://github.com/MhLiao/DB.
Args:
shrink_ratio (float): The area shrunk ratio between text
kernels and their text masks.
thr_min (float): The minimum value of the threshold map.
thr_max (float): The maximum value of the threshold map.
min_short_size (int): The minimum size of polygon below which
the polygon is invalid.
"""
def __init__(self,
shrink_ratio=0.4,
thr_min=0.3,
thr_max=0.7,
min_short_size=8):
super().__init__()
self.shrink_ratio = shrink_ratio
self.thr_min = thr_min
self.thr_max = thr_max
self.min_short_size = min_short_size
def find_invalid(self, results):
"""Find invalid polygons.
Args:
results (dict): The dict containing gt_mask.
Returns:
ignore_tags (list[bool]): The indicators for ignoring polygons.
"""
texts = results['gt_masks'].masks
ignore_tags = [False] * len(texts)
for idx, text in enumerate(texts):
if self.invalid_polygon(text[0]):
ignore_tags[idx] = True
return ignore_tags
def invalid_polygon(self, poly):
"""Judge the input polygon is invalid or not. It is invalid if its area
smaller than 1 or the shorter side of its minimum bounding box smaller
than min_short_size.
Args:
poly (ndarray): The polygon boundary point sequence.
Returns:
True/False (bool): Whether the polygon is invalid.
"""
area = self.polygon_area(poly)
if abs(area) < 1:
return True
short_size = min(self.polygon_size(poly))
if short_size < self.min_short_size:
return True
return False
def ignore_texts(self, results, ignore_tags):
"""Ignore gt masks and gt_labels while padding gt_masks_ignore in
results given ignore_tags.
Args:
results (dict): Result for one image.
ignore_tags (list[int]): Indicate whether to ignore its
corresponding ground truth text.
Returns:
results (dict): Results after filtering.
"""
flag_len = len(ignore_tags)
assert flag_len == len(results['gt_masks'].masks)
assert flag_len == len(results['gt_labels'])
results['gt_masks_ignore'].masks += [
mask for i, mask in enumerate(results['gt_masks'].masks)
if ignore_tags[i]
]
results['gt_masks'].masks = [
mask for i, mask in enumerate(results['gt_masks'].masks)
if not ignore_tags[i]
]
results['gt_labels'] = np.array([
mask for i, mask in enumerate(results['gt_labels'])
if not ignore_tags[i]
])
new_ignore_tags = [ignore for ignore in ignore_tags if not ignore]
return results, new_ignore_tags
def generate_thr_map(self, img_size, polygons):
"""Generate threshold map.
Args:
img_size (tuple(int)): The image size (h,w)
polygons (list(ndarray)): The polygon list.
Returns:
thr_map (ndarray): The generated threshold map.
thr_mask (ndarray): The effective mask of threshold map.
"""
thr_map = np.zeros(img_size, dtype=np.float32)
thr_mask = np.zeros(img_size, dtype=np.uint8)
for polygon in polygons:
self.draw_border_map(polygon[0], thr_map, mask=thr_mask)
thr_map = thr_map * (self.thr_max - self.thr_min) + self.thr_min
return thr_map, thr_mask
def draw_border_map(self, polygon, canvas, mask):
"""Generate threshold map for one polygon.
Args:
polygon(ndarray): The polygon boundary ndarray.
canvas(ndarray): The generated threshold map.
mask(ndarray): The generated threshold mask.
"""
polygon = polygon.reshape(-1, 2)
assert polygon.ndim == 2
assert polygon.shape[1] == 2
polygon_shape = Polygon(polygon)
distance = (
polygon_shape.area * (1 - np.power(self.shrink_ratio, 2)) /
polygon_shape.length)
subject = [tuple(p) for p in polygon]
padding = pyclipper.PyclipperOffset()
padding.AddPath(subject, pyclipper.JT_ROUND,
pyclipper.ET_CLOSEDPOLYGON)
padded_polygon = padding.Execute(distance)
if len(padded_polygon) > 0:
padded_polygon = np.array(padded_polygon[0])
else:
print(f'padding {polygon} with {distance} gets {padded_polygon}')
padded_polygon = polygon.copy().astype(np.int32)
x_min = padded_polygon[:, 0].min()
x_max = padded_polygon[:, 0].max()
y_min = padded_polygon[:, 1].min()
y_max = padded_polygon[:, 1].max()
width = x_max - x_min + 1
height = y_max - y_min + 1
polygon[:, 0] = polygon[:, 0] - x_min
polygon[:, 1] = polygon[:, 1] - y_min
xs = np.broadcast_to(
np.linspace(0, width - 1, num=width).reshape(1, width),
(height, width))
ys = np.broadcast_to(
np.linspace(0, height - 1, num=height).reshape(height, 1),
(height, width))
distance_map = np.zeros((polygon.shape[0], height, width),
dtype=np.float32)
for i in range(polygon.shape[0]):
j = (i + 1) % polygon.shape[0]
absolute_distance = self.point2line(xs, ys, polygon[i], polygon[j])
distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
distance_map = distance_map.min(axis=0)
x_min_valid = min(max(0, x_min), canvas.shape[1] - 1)
x_max_valid = min(max(0, x_max), canvas.shape[1] - 1)
y_min_valid = min(max(0, y_min), canvas.shape[0] - 1)
y_max_valid = min(max(0, y_max), canvas.shape[0] - 1)
if x_min_valid - x_min >= width or y_min_valid - y_min >= height:
return
cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
canvas[y_min_valid:y_max_valid + 1,
x_min_valid:x_max_valid + 1] = np.fmax(
1 - distance_map[y_min_valid - y_min:y_max_valid - y_max +
height, x_min_valid - x_min:x_max_valid -
x_max + width],
canvas[y_min_valid:y_max_valid + 1,
x_min_valid:x_max_valid + 1])
def generate_targets(self, results):
"""Generate the gt targets for DBNet.
Args:
results (dict): The input result dictionary.
Returns:
results (dict): The output result dictionary.
"""
assert isinstance(results, dict)
if 'bbox_fields' in results:
results['bbox_fields'].clear()
ignore_tags = self.find_invalid(results)
results, ignore_tags = self.ignore_texts(results, ignore_tags)
h, w, _ = results['img_shape']
polygons = results['gt_masks'].masks
# generate gt_shrink_kernel
gt_shrink, ignore_tags = self.generate_kernels((h, w),
polygons,
self.shrink_ratio,
ignore_tags=ignore_tags)
results, ignore_tags = self.ignore_texts(results, ignore_tags)
# genenrate gt_shrink_mask
polygons_ignore = results['gt_masks_ignore'].masks
gt_shrink_mask = self.generate_effective_mask((h, w), polygons_ignore)
# generate gt_threshold and gt_threshold_mask
polygons = results['gt_masks'].masks
gt_thr, gt_thr_mask = self.generate_thr_map((h, w), polygons)
results['mask_fields'].clear() # rm gt_masks encoded by polygons
results.pop('gt_labels', None)
results.pop('gt_masks', None)
results.pop('gt_bboxes', None)
results.pop('gt_bboxes_ignore', None)
mapping = {
'gt_shrink': gt_shrink,
'gt_shrink_mask': gt_shrink_mask,
'gt_thr': gt_thr,
'gt_thr_mask': gt_thr_mask
}
for key, value in mapping.items():
value = value if isinstance(value, list) else [value]
results[key] = BitmapMasks(value, h, w)
results['mask_fields'].append(key)
return results

View File

@ -1,534 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np
from lanms import merge_quadrangle_n9 as la_nms
from mmdet.core import BitmapMasks
from numpy.linalg import norm
import mmocr.utils.check_argument as check_argument
from mmocr.registry import TRANSFORMS
from .textsnake_targets import TextSnakeTargets
@TRANSFORMS.register_module()
class DRRGTargets(TextSnakeTargets):
"""Generate the ground truth targets of DRRG: Deep Relational Reasoning
Graph Network for Arbitrary Shape Text Detection.
[https://arxiv.org/abs/2003.07493]. This code was partially adapted from
https://github.com/GXYM/DRRG licensed under the MIT license.
Args:
orientation_thr (float): The threshold for distinguishing between
head edge and tail edge among the horizontal and vertical edges
of a quadrangle.
resample_step (float): The step size for resampling the text center
line.
num_min_comps (int): The minimum number of text components, which
should be larger than k_hop1 mentioned in paper.
num_max_comps (int): The maximum number of text components.
min_width (float): The minimum width of text components.
max_width (float): The maximum width of text components.
center_region_shrink_ratio (float): The shrink ratio of text center
regions.
comp_shrink_ratio (float): The shrink ratio of text components.
comp_w_h_ratio (float): The width to height ratio of text components.
min_rand_half_height(float): The minimum half-height of random text
components.
max_rand_half_height (float): The maximum half-height of random
text components.
jitter_level (float): The jitter level of text component geometric
features.
"""
def __init__(self,
orientation_thr=2.0,
resample_step=8.0,
num_min_comps=9,
num_max_comps=600,
min_width=8.0,
max_width=24.0,
center_region_shrink_ratio=0.3,
comp_shrink_ratio=1.0,
comp_w_h_ratio=0.3,
text_comp_nms_thr=0.25,
min_rand_half_height=8.0,
max_rand_half_height=24.0,
jitter_level=0.2):
super().__init__()
self.orientation_thr = orientation_thr
self.resample_step = resample_step
self.num_max_comps = num_max_comps
self.num_min_comps = num_min_comps
self.min_width = min_width
self.max_width = max_width
self.center_region_shrink_ratio = center_region_shrink_ratio
self.comp_shrink_ratio = comp_shrink_ratio
self.comp_w_h_ratio = comp_w_h_ratio
self.text_comp_nms_thr = text_comp_nms_thr
self.min_rand_half_height = min_rand_half_height
self.max_rand_half_height = max_rand_half_height
self.jitter_level = jitter_level
def dist_point2line(self, point, line):
assert isinstance(line, tuple)
point1, point2 = line
d = abs(np.cross(point2 - point1, point - point1)) / (
norm(point2 - point1) + 1e-8)
return d
def draw_center_region_maps(self, top_line, bot_line, center_line,
center_region_mask, top_height_map,
bot_height_map, sin_map, cos_map,
region_shrink_ratio):
"""Draw attributes of text components on text center regions.
Args:
top_line (ndarray): The points composing the top side lines of text
polygons.
bot_line (ndarray): The points composing bottom side lines of text
polygons.
center_line (ndarray): The points composing the center lines of
text instances.
center_region_mask (ndarray): The text center region mask.
top_height_map (ndarray): The map on which the distance from points
to top side lines will be drawn for each pixel in text center
regions.
bot_height_map (ndarray): The map on which the distance from points
to bottom side lines will be drawn for each pixel in text
center regions.
sin_map (ndarray): The map of vector_sin(top_point - bot_point)
that will be drawn on text center regions.
cos_map (ndarray): The map of vector_cos(top_point - bot_point)
will be drawn on text center regions.
region_shrink_ratio (float): The shrink ratio of text center
regions.
"""
assert top_line.shape == bot_line.shape == center_line.shape
assert (center_region_mask.shape == top_height_map.shape ==
bot_height_map.shape == sin_map.shape == cos_map.shape)
assert isinstance(region_shrink_ratio, float)
h, w = center_region_mask.shape
for i in range(0, len(center_line) - 1):
top_mid_point = (top_line[i] + top_line[i + 1]) / 2
bot_mid_point = (bot_line[i] + bot_line[i + 1]) / 2
sin_theta = self.vector_sin(top_mid_point - bot_mid_point)
cos_theta = self.vector_cos(top_mid_point - bot_mid_point)
tl = center_line[i] + (top_line[i] -
center_line[i]) * region_shrink_ratio
tr = center_line[i + 1] + (
top_line[i + 1] - center_line[i + 1]) * region_shrink_ratio
br = center_line[i + 1] + (
bot_line[i + 1] - center_line[i + 1]) * region_shrink_ratio
bl = center_line[i] + (bot_line[i] -
center_line[i]) * region_shrink_ratio
current_center_box = np.vstack([tl, tr, br, bl]).astype(np.int32)
cv2.fillPoly(center_region_mask, [current_center_box], color=1)
cv2.fillPoly(sin_map, [current_center_box], color=sin_theta)
cv2.fillPoly(cos_map, [current_center_box], color=cos_theta)
current_center_box[:, 0] = np.clip(current_center_box[:, 0], 0,
w - 1)
current_center_box[:, 1] = np.clip(current_center_box[:, 1], 0,
h - 1)
min_coord = np.min(current_center_box, axis=0).astype(np.int32)
max_coord = np.max(current_center_box, axis=0).astype(np.int32)
current_center_box = current_center_box - min_coord
box_sz = (max_coord - min_coord + 1)
center_box_mask = np.zeros((box_sz[1], box_sz[0]), dtype=np.uint8)
cv2.fillPoly(center_box_mask, [current_center_box], color=1)
inds = np.argwhere(center_box_mask > 0)
inds = inds + (min_coord[1], min_coord[0])
inds_xy = np.fliplr(inds)
top_height_map[(inds[:, 0], inds[:, 1])] = self.dist_point2line(
inds_xy, (top_line[i], top_line[i + 1]))
bot_height_map[(inds[:, 0], inds[:, 1])] = self.dist_point2line(
inds_xy, (bot_line[i], bot_line[i + 1]))
def generate_center_mask_attrib_maps(self, img_size, text_polys):
"""Generate text center region masks and geometric attribute maps.
Args:
img_size (tuple): The image size (height, width).
text_polys (list[list[ndarray]]): The list of text polygons.
Returns:
center_lines (list): The list of text center lines.
center_region_mask (ndarray): The text center region mask.
top_height_map (ndarray): The map on which the distance from points
to top side lines will be drawn for each pixel in text center
regions.
bot_height_map (ndarray): The map on which the distance from points
to bottom side lines will be drawn for each pixel in text
center regions.
sin_map (ndarray): The sin(theta) map where theta is the angle
between vector (top point - bottom point) and vector (1, 0).
cos_map (ndarray): The cos(theta) map where theta is the angle
between vector (top point - bottom point) and vector (1, 0).
"""
assert isinstance(img_size, tuple)
assert check_argument.is_2dlist(text_polys)
h, w = img_size
center_lines = []
center_region_mask = np.zeros((h, w), np.uint8)
top_height_map = np.zeros((h, w), dtype=np.float32)
bot_height_map = np.zeros((h, w), dtype=np.float32)
sin_map = np.zeros((h, w), dtype=np.float32)
cos_map = np.zeros((h, w), dtype=np.float32)
for poly in text_polys:
assert len(poly) == 1
polygon_points = poly[0].reshape(-1, 2)
_, _, top_line, bot_line = self.reorder_poly_edge(polygon_points)
resampled_top_line, resampled_bot_line = self.resample_sidelines(
top_line, bot_line, self.resample_step)
resampled_bot_line = resampled_bot_line[::-1]
center_line = (resampled_top_line + resampled_bot_line) / 2
if self.vector_slope(center_line[-1] - center_line[0]) > 2:
if (center_line[-1] - center_line[0])[1] < 0:
center_line = center_line[::-1]
resampled_top_line = resampled_top_line[::-1]
resampled_bot_line = resampled_bot_line[::-1]
else:
if (center_line[-1] - center_line[0])[0] < 0:
center_line = center_line[::-1]
resampled_top_line = resampled_top_line[::-1]
resampled_bot_line = resampled_bot_line[::-1]
line_head_shrink_len = np.clip(
(norm(top_line[0] - bot_line[0]) * self.comp_w_h_ratio),
self.min_width, self.max_width) / 2
line_tail_shrink_len = np.clip(
(norm(top_line[-1] - bot_line[-1]) * self.comp_w_h_ratio),
self.min_width, self.max_width) / 2
num_head_shrink = int(line_head_shrink_len // self.resample_step)
num_tail_shrink = int(line_tail_shrink_len // self.resample_step)
if len(center_line) > num_head_shrink + num_tail_shrink + 2:
center_line = center_line[num_head_shrink:len(center_line) -
num_tail_shrink]
resampled_top_line = resampled_top_line[
num_head_shrink:len(resampled_top_line) - num_tail_shrink]
resampled_bot_line = resampled_bot_line[
num_head_shrink:len(resampled_bot_line) - num_tail_shrink]
center_lines.append(center_line.astype(np.int32))
self.draw_center_region_maps(resampled_top_line,
resampled_bot_line, center_line,
center_region_mask, top_height_map,
bot_height_map, sin_map, cos_map,
self.center_region_shrink_ratio)
return (center_lines, center_region_mask, top_height_map,
bot_height_map, sin_map, cos_map)
def generate_rand_comp_attribs(self, num_rand_comps, center_sample_mask):
"""Generate random text components and their attributes to ensure the
the number of text components in an image is larger than k_hop1, which
is the number of one hop neighbors in KNN graph.
Args:
num_rand_comps (int): The number of random text components.
center_sample_mask (ndarray): The region mask for sampling text
component centers .
Returns:
rand_comp_attribs (ndarray): The random text component attributes
(x, y, h, w, cos, sin, comp_label=0).
"""
assert isinstance(num_rand_comps, int)
assert num_rand_comps > 0
assert center_sample_mask.ndim == 2
h, w = center_sample_mask.shape
max_rand_half_height = self.max_rand_half_height
min_rand_half_height = self.min_rand_half_height
max_rand_height = max_rand_half_height * 2
max_rand_width = np.clip(max_rand_height * self.comp_w_h_ratio,
self.min_width, self.max_width)
margin = int(
np.sqrt((max_rand_height / 2)**2 + (max_rand_width / 2)**2)) + 1
if 2 * margin + 1 > min(h, w):
assert min(h, w) > (np.sqrt(2) * (self.min_width + 1))
max_rand_half_height = max(min(h, w) / 4, self.min_width / 2 + 1)
min_rand_half_height = max(max_rand_half_height / 4,
self.min_width / 2)
max_rand_height = max_rand_half_height * 2
max_rand_width = np.clip(max_rand_height * self.comp_w_h_ratio,
self.min_width, self.max_width)
margin = int(
np.sqrt((max_rand_height / 2)**2 +
(max_rand_width / 2)**2)) + 1
inner_center_sample_mask = np.zeros_like(center_sample_mask)
inner_center_sample_mask[margin:h - margin, margin:w - margin] = \
center_sample_mask[margin:h - margin, margin:w - margin]
kernel_size = int(np.clip(max_rand_half_height, 7, 21))
inner_center_sample_mask = cv2.erode(
inner_center_sample_mask,
np.ones((kernel_size, kernel_size), np.uint8))
center_candidates = np.argwhere(inner_center_sample_mask > 0)
num_center_candidates = len(center_candidates)
sample_inds = np.random.choice(num_center_candidates, num_rand_comps)
rand_centers = center_candidates[sample_inds]
rand_top_height = np.random.randint(
min_rand_half_height,
max_rand_half_height,
size=(len(rand_centers), 1))
rand_bot_height = np.random.randint(
min_rand_half_height,
max_rand_half_height,
size=(len(rand_centers), 1))
rand_cos = 2 * np.random.random(size=(len(rand_centers), 1)) - 1
rand_sin = 2 * np.random.random(size=(len(rand_centers), 1)) - 1
scale = np.sqrt(1.0 / (rand_cos**2 + rand_sin**2 + 1e-8))
rand_cos = rand_cos * scale
rand_sin = rand_sin * scale
height = (rand_top_height + rand_bot_height)
width = np.clip(height * self.comp_w_h_ratio, self.min_width,
self.max_width)
rand_comp_attribs = np.hstack([
rand_centers[:, ::-1], height, width, rand_cos, rand_sin,
np.zeros_like(rand_sin)
]).astype(np.float32)
return rand_comp_attribs
def jitter_comp_attribs(self, comp_attribs, jitter_level):
"""Jitter text components attributes.
Args:
comp_attribs (ndarray): The text component attributes.
jitter_level (float): The jitter level of text components
attributes.
Returns:
jittered_comp_attribs (ndarray): The jittered text component
attributes (x, y, h, w, cos, sin, comp_label).
"""
assert comp_attribs.shape[1] == 7
assert comp_attribs.shape[0] > 0
assert isinstance(jitter_level, float)
x = comp_attribs[:, 0].reshape((-1, 1))
y = comp_attribs[:, 1].reshape((-1, 1))
h = comp_attribs[:, 2].reshape((-1, 1))
w = comp_attribs[:, 3].reshape((-1, 1))
cos = comp_attribs[:, 4].reshape((-1, 1))
sin = comp_attribs[:, 5].reshape((-1, 1))
comp_labels = comp_attribs[:, 6].reshape((-1, 1))
x += (np.random.random(size=(len(comp_attribs), 1)) -
0.5) * (h * np.abs(cos) + w * np.abs(sin)) * jitter_level
y += (np.random.random(size=(len(comp_attribs), 1)) -
0.5) * (h * np.abs(sin) + w * np.abs(cos)) * jitter_level
h += (np.random.random(size=(len(comp_attribs), 1)) -
0.5) * h * jitter_level
w += (np.random.random(size=(len(comp_attribs), 1)) -
0.5) * w * jitter_level
cos += (np.random.random(size=(len(comp_attribs), 1)) -
0.5) * 2 * jitter_level
sin += (np.random.random(size=(len(comp_attribs), 1)) -
0.5) * 2 * jitter_level
scale = np.sqrt(1.0 / (cos**2 + sin**2 + 1e-8))
cos = cos * scale
sin = sin * scale
jittered_comp_attribs = np.hstack([x, y, h, w, cos, sin, comp_labels])
return jittered_comp_attribs
def generate_comp_attribs(self, center_lines, text_mask,
center_region_mask, top_height_map,
bot_height_map, sin_map, cos_map):
"""Generate text component attributes.
Args:
center_lines (list[ndarray]): The list of text center lines .
text_mask (ndarray): The text region mask.
center_region_mask (ndarray): The text center region mask.
top_height_map (ndarray): The map on which the distance from points
to top side lines will be drawn for each pixel in text center
regions.
bot_height_map (ndarray): The map on which the distance from points
to bottom side lines will be drawn for each pixel in text
center regions.
sin_map (ndarray): The sin(theta) map where theta is the angle
between vector (top point - bottom point) and vector (1, 0).
cos_map (ndarray): The cos(theta) map where theta is the angle
between vector (top point - bottom point) and vector (1, 0).
Returns:
pad_comp_attribs (ndarray): The padded text component attributes
of a fixed size.
"""
assert isinstance(center_lines, list)
assert (text_mask.shape == center_region_mask.shape ==
top_height_map.shape == bot_height_map.shape == sin_map.shape
== cos_map.shape)
center_lines_mask = np.zeros_like(center_region_mask)
cv2.polylines(center_lines_mask, center_lines, 0, 1, 1)
center_lines_mask = center_lines_mask * center_region_mask
comp_centers = np.argwhere(center_lines_mask > 0)
y = comp_centers[:, 0]
x = comp_centers[:, 1]
top_height = top_height_map[y, x].reshape(
(-1, 1)) * self.comp_shrink_ratio
bot_height = bot_height_map[y, x].reshape(
(-1, 1)) * self.comp_shrink_ratio
sin = sin_map[y, x].reshape((-1, 1))
cos = cos_map[y, x].reshape((-1, 1))
top_mid_points = comp_centers + np.hstack(
[top_height * sin, top_height * cos])
bot_mid_points = comp_centers - np.hstack(
[bot_height * sin, bot_height * cos])
width = (top_height + bot_height) * self.comp_w_h_ratio
width = np.clip(width, self.min_width, self.max_width)
r = width / 2
tl = top_mid_points[:, ::-1] - np.hstack([-r * sin, r * cos])
tr = top_mid_points[:, ::-1] + np.hstack([-r * sin, r * cos])
br = bot_mid_points[:, ::-1] + np.hstack([-r * sin, r * cos])
bl = bot_mid_points[:, ::-1] - np.hstack([-r * sin, r * cos])
text_comps = np.hstack([tl, tr, br, bl]).astype(np.float32)
score = np.ones((text_comps.shape[0], 1), dtype=np.float32)
text_comps = np.hstack([text_comps, score])
text_comps = la_nms(text_comps, self.text_comp_nms_thr)
if text_comps.shape[0] >= 1:
img_h, img_w = center_region_mask.shape
text_comps[:, 0:8:2] = np.clip(text_comps[:, 0:8:2], 0, img_w - 1)
text_comps[:, 1:8:2] = np.clip(text_comps[:, 1:8:2], 0, img_h - 1)
comp_centers = np.mean(
text_comps[:, 0:8].reshape((-1, 4, 2)),
axis=1).astype(np.int32)
x = comp_centers[:, 0]
y = comp_centers[:, 1]
height = (top_height_map[y, x] + bot_height_map[y, x]).reshape(
(-1, 1))
width = np.clip(height * self.comp_w_h_ratio, self.min_width,
self.max_width)
cos = cos_map[y, x].reshape((-1, 1))
sin = sin_map[y, x].reshape((-1, 1))
_, comp_label_mask = cv2.connectedComponents(
center_region_mask, connectivity=8)
comp_labels = comp_label_mask[y, x].reshape(
(-1, 1)).astype(np.float32)
x = x.reshape((-1, 1)).astype(np.float32)
y = y.reshape((-1, 1)).astype(np.float32)
comp_attribs = np.hstack(
[x, y, height, width, cos, sin, comp_labels])
comp_attribs = self.jitter_comp_attribs(comp_attribs,
self.jitter_level)
if comp_attribs.shape[0] < self.num_min_comps:
num_rand_comps = self.num_min_comps - comp_attribs.shape[0]
rand_comp_attribs = self.generate_rand_comp_attribs(
num_rand_comps, 1 - text_mask)
comp_attribs = np.vstack([comp_attribs, rand_comp_attribs])
else:
comp_attribs = self.generate_rand_comp_attribs(
self.num_min_comps, 1 - text_mask)
num_comps = (
np.ones((comp_attribs.shape[0], 1), dtype=np.float32) *
comp_attribs.shape[0])
comp_attribs = np.hstack([num_comps, comp_attribs])
if comp_attribs.shape[0] > self.num_max_comps:
comp_attribs = comp_attribs[:self.num_max_comps, :]
comp_attribs[:, 0] = self.num_max_comps
pad_comp_attribs = np.zeros(
(self.num_max_comps, comp_attribs.shape[1]), dtype=np.float32)
pad_comp_attribs[:comp_attribs.shape[0], :] = comp_attribs
return pad_comp_attribs
def generate_targets(self, results):
"""Generate the gt targets for DRRG.
Args:
results (dict): The input result dictionary.
Returns:
results (dict): The output result dictionary.
"""
assert isinstance(results, dict)
polygon_masks = results['gt_masks'].masks
polygon_masks_ignore = results['gt_masks_ignore'].masks
h, w, _ = results['img_shape']
gt_text_mask = self.generate_text_region_mask((h, w), polygon_masks)
gt_mask = self.generate_effective_mask((h, w), polygon_masks_ignore)
(center_lines, gt_center_region_mask, gt_top_height_map,
gt_bot_height_map, gt_sin_map,
gt_cos_map) = self.generate_center_mask_attrib_maps((h, w),
polygon_masks)
gt_comp_attribs = self.generate_comp_attribs(center_lines,
gt_text_mask,
gt_center_region_mask,
gt_top_height_map,
gt_bot_height_map,
gt_sin_map, gt_cos_map)
results['mask_fields'].clear() # rm gt_masks encoded by polygons
mapping = {
'gt_text_mask': gt_text_mask,
'gt_center_region_mask': gt_center_region_mask,
'gt_mask': gt_mask,
'gt_top_height_map': gt_top_height_map,
'gt_bot_height_map': gt_bot_height_map,
'gt_sin_map': gt_sin_map,
'gt_cos_map': gt_cos_map
}
for key, value in mapping.items():
value = value if isinstance(value, list) else [value]
results[key] = BitmapMasks(value, h, w)
results['mask_fields'].append(key)
results['gt_comp_attribs'] = gt_comp_attribs
return results

View File

@ -1,361 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np
from numpy.fft import fft
from numpy.linalg import norm
import mmocr.utils.check_argument as check_argument
from mmocr.registry import TRANSFORMS
from .textsnake_targets import TextSnakeTargets
@TRANSFORMS.register_module()
class FCENetTargets(TextSnakeTargets):
"""Generate the ground truth targets of FCENet: Fourier Contour Embedding
for Arbitrary-Shaped Text Detection.
[https://arxiv.org/abs/2104.10442]
Args:
fourier_degree (int): The maximum Fourier transform degree k.
resample_step (float): The step size for resampling the text center
line (TCL). It's better not to exceed half of the minimum width.
center_region_shrink_ratio (float): The shrink ratio of text center
region.
level_size_divisors (tuple(int)): The downsample ratio on each level.
level_proportion_range (tuple(tuple(int))): The range of text sizes
assigned to each level.
"""
def __init__(self,
fourier_degree=5,
resample_step=4.0,
center_region_shrink_ratio=0.3,
level_size_divisors=(8, 16, 32),
level_proportion_range=((0, 0.4), (0.3, 0.7), (0.6, 1.0))):
super().__init__()
assert isinstance(level_size_divisors, tuple)
assert isinstance(level_proportion_range, tuple)
assert len(level_size_divisors) == len(level_proportion_range)
self.fourier_degree = fourier_degree
self.resample_step = resample_step
self.center_region_shrink_ratio = center_region_shrink_ratio
self.level_size_divisors = level_size_divisors
self.level_proportion_range = level_proportion_range
def generate_center_region_mask(self, img_size, text_polys):
"""Generate text center region mask.
Args:
img_size (tuple): The image size of (height, width).
text_polys (list[list[ndarray]]): The list of text polygons.
Returns:
center_region_mask (ndarray): The text center region mask.
"""
assert isinstance(img_size, tuple)
assert check_argument.is_2dlist(text_polys)
h, w = img_size
center_region_mask = np.zeros((h, w), np.uint8)
center_region_boxes = []
for poly in text_polys:
assert len(poly) == 1
polygon_points = poly[0].reshape(-1, 2)
_, _, top_line, bot_line = self.reorder_poly_edge(polygon_points)
resampled_top_line, resampled_bot_line = self.resample_sidelines(
top_line, bot_line, self.resample_step)
resampled_bot_line = resampled_bot_line[::-1]
center_line = (resampled_top_line + resampled_bot_line) / 2
line_head_shrink_len = norm(resampled_top_line[0] -
resampled_bot_line[0]) / 4.0
line_tail_shrink_len = norm(resampled_top_line[-1] -
resampled_bot_line[-1]) / 4.0
head_shrink_num = int(line_head_shrink_len // self.resample_step)
tail_shrink_num = int(line_tail_shrink_len // self.resample_step)
if len(center_line) > head_shrink_num + tail_shrink_num + 2:
center_line = center_line[head_shrink_num:len(center_line) -
tail_shrink_num]
resampled_top_line = resampled_top_line[
head_shrink_num:len(resampled_top_line) - tail_shrink_num]
resampled_bot_line = resampled_bot_line[
head_shrink_num:len(resampled_bot_line) - tail_shrink_num]
for i in range(0, len(center_line) - 1):
tl = center_line[i] + (resampled_top_line[i] - center_line[i]
) * self.center_region_shrink_ratio
tr = center_line[i + 1] + (
resampled_top_line[i + 1] -
center_line[i + 1]) * self.center_region_shrink_ratio
br = center_line[i + 1] + (
resampled_bot_line[i + 1] -
center_line[i + 1]) * self.center_region_shrink_ratio
bl = center_line[i] + (resampled_bot_line[i] - center_line[i]
) * self.center_region_shrink_ratio
current_center_box = np.vstack([tl, tr, br,
bl]).astype(np.int32)
center_region_boxes.append(current_center_box)
cv2.fillPoly(center_region_mask, center_region_boxes, 1)
return center_region_mask
def resample_polygon(self, polygon, n=400):
"""Resample one polygon with n points on its boundary.
Args:
polygon (list[float]): The input polygon.
n (int): The number of resampled points.
Returns:
resampled_polygon (list[float]): The resampled polygon.
"""
length = []
for i in range(len(polygon)):
p1 = polygon[i]
if i == len(polygon) - 1:
p2 = polygon[0]
else:
p2 = polygon[i + 1]
length.append(((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)**0.5)
total_length = sum(length)
n_on_each_line = (np.array(length) / (total_length + 1e-8)) * n
n_on_each_line = n_on_each_line.astype(np.int32)
new_polygon = []
for i in range(len(polygon)):
num = n_on_each_line[i]
p1 = polygon[i]
if i == len(polygon) - 1:
p2 = polygon[0]
else:
p2 = polygon[i + 1]
if num == 0:
continue
dxdy = (p2 - p1) / num
for j in range(num):
point = p1 + dxdy * j
new_polygon.append(point)
return np.array(new_polygon)
def normalize_polygon(self, polygon):
"""Normalize one polygon so that its start point is at right most.
Args:
polygon (list[float]): The origin polygon.
Returns:
new_polygon (lost[float]): The polygon with start point at right.
"""
temp_polygon = polygon - polygon.mean(axis=0)
x = np.abs(temp_polygon[:, 0])
y = temp_polygon[:, 1]
index_x = np.argsort(x)
index_y = np.argmin(y[index_x[:8]])
index = index_x[index_y]
new_polygon = np.concatenate([polygon[index:], polygon[:index]])
return new_polygon
def poly2fourier(self, polygon, fourier_degree):
"""Perform Fourier transformation to generate Fourier coefficients ck
from polygon.
Args:
polygon (ndarray): An input polygon.
fourier_degree (int): The maximum Fourier degree K.
Returns:
c (ndarray(complex)): Fourier coefficients.
"""
points = polygon[:, 0] + polygon[:, 1] * 1j
c_fft = fft(points) / len(points)
c = np.hstack((c_fft[-fourier_degree:], c_fft[:fourier_degree + 1]))
return c
def clockwise(self, c, fourier_degree):
"""Make sure the polygon reconstructed from Fourier coefficients c in
the clockwise direction.
Args:
polygon (list[float]): The origin polygon.
Returns:
new_polygon (lost[float]): The polygon in clockwise point order.
"""
if np.abs(c[fourier_degree + 1]) > np.abs(c[fourier_degree - 1]):
return c
elif np.abs(c[fourier_degree + 1]) < np.abs(c[fourier_degree - 1]):
return c[::-1]
else:
if np.abs(c[fourier_degree + 2]) > np.abs(c[fourier_degree - 2]):
return c
else:
return c[::-1]
def cal_fourier_signature(self, polygon, fourier_degree):
"""Calculate Fourier signature from input polygon.
Args:
polygon (ndarray): The input polygon.
fourier_degree (int): The maximum Fourier degree K.
Returns:
fourier_signature (ndarray): An array shaped (2k+1, 2) containing
real part and image part of 2k+1 Fourier coefficients.
"""
resampled_polygon = self.resample_polygon(polygon)
resampled_polygon = self.normalize_polygon(resampled_polygon)
fourier_coeff = self.poly2fourier(resampled_polygon, fourier_degree)
fourier_coeff = self.clockwise(fourier_coeff, fourier_degree)
real_part = np.real(fourier_coeff).reshape((-1, 1))
image_part = np.imag(fourier_coeff).reshape((-1, 1))
fourier_signature = np.hstack([real_part, image_part])
return fourier_signature
def generate_fourier_maps(self, img_size, text_polys):
"""Generate Fourier coefficient maps.
Args:
img_size (tuple): The image size of (height, width).
text_polys (list[list[ndarray]]): The list of text polygons.
Returns:
fourier_real_map (ndarray): The Fourier coefficient real part maps.
fourier_image_map (ndarray): The Fourier coefficient image part
maps.
"""
assert isinstance(img_size, tuple)
assert check_argument.is_2dlist(text_polys)
h, w = img_size
k = self.fourier_degree
real_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32)
imag_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32)
for poly in text_polys:
assert len(poly) == 1
text_instance = [[poly[0][i], poly[0][i + 1]]
for i in range(0, len(poly[0]), 2)]
mask = np.zeros((h, w), dtype=np.uint8)
polygon = np.array(text_instance).reshape((1, -1, 2))
cv2.fillPoly(mask, polygon.astype(np.int32), 1)
fourier_coeff = self.cal_fourier_signature(polygon[0], k)
for i in range(-k, k + 1):
if i != 0:
real_map[i + k, :, :] = mask * fourier_coeff[i + k, 0] + (
1 - mask) * real_map[i + k, :, :]
imag_map[i + k, :, :] = mask * fourier_coeff[i + k, 1] + (
1 - mask) * imag_map[i + k, :, :]
else:
yx = np.argwhere(mask > 0.5)
k_ind = np.ones((len(yx)), dtype=np.int64) * k
y, x = yx[:, 0], yx[:, 1]
real_map[k_ind, y, x] = fourier_coeff[k, 0] - x
imag_map[k_ind, y, x] = fourier_coeff[k, 1] - y
return real_map, imag_map
def generate_level_targets(self, img_size, text_polys, ignore_polys):
"""Generate ground truth target on each level.
Args:
img_size (list[int]): Shape of input image.
text_polys (list[list[ndarray]]): A list of ground truth polygons.
ignore_polys (list[list[ndarray]]): A list of ignored polygons.
Returns:
level_maps (list(ndarray)): A list of ground target on each level.
"""
h, w = img_size
lv_size_divs = self.level_size_divisors
lv_proportion_range = self.level_proportion_range
lv_text_polys = [[] for i in range(len(lv_size_divs))]
lv_ignore_polys = [[] for i in range(len(lv_size_divs))]
level_maps = []
for poly in text_polys:
assert len(poly) == 1
text_instance = [[poly[0][i], poly[0][i + 1]]
for i in range(0, len(poly[0]), 2)]
polygon = np.array(text_instance, dtype=np.int).reshape((1, -1, 2))
_, _, box_w, box_h = cv2.boundingRect(polygon)
proportion = max(box_h, box_w) / (h + 1e-8)
for ind, proportion_range in enumerate(lv_proportion_range):
if proportion_range[0] < proportion < proportion_range[1]:
lv_text_polys[ind].append([poly[0] / lv_size_divs[ind]])
for ignore_poly in ignore_polys:
assert len(ignore_poly) == 1
text_instance = [[ignore_poly[0][i], ignore_poly[0][i + 1]]
for i in range(0, len(ignore_poly[0]), 2)]
polygon = np.array(text_instance, dtype=np.int).reshape((1, -1, 2))
_, _, box_w, box_h = cv2.boundingRect(polygon)
proportion = max(box_h, box_w) / (h + 1e-8)
for ind, proportion_range in enumerate(lv_proportion_range):
if proportion_range[0] < proportion < proportion_range[1]:
lv_ignore_polys[ind].append(
[ignore_poly[0] / lv_size_divs[ind]])
for ind, size_divisor in enumerate(lv_size_divs):
current_level_maps = []
level_img_size = (h // size_divisor, w // size_divisor)
text_region = self.generate_text_region_mask(
level_img_size, lv_text_polys[ind])[None]
current_level_maps.append(text_region)
center_region = self.generate_center_region_mask(
level_img_size, lv_text_polys[ind])[None]
current_level_maps.append(center_region)
effective_mask = self.generate_effective_mask(
level_img_size, lv_ignore_polys[ind])[None]
current_level_maps.append(effective_mask)
fourier_real_map, fourier_image_maps = self.generate_fourier_maps(
level_img_size, lv_text_polys[ind])
current_level_maps.append(fourier_real_map)
current_level_maps.append(fourier_image_maps)
level_maps.append(np.concatenate(current_level_maps))
return level_maps
def generate_targets(self, results):
"""Generate the ground truth targets for FCENet.
Args:
results (dict): The input result dictionary.
Returns:
results (dict): The output result dictionary.
"""
assert isinstance(results, dict)
polygon_masks = results['gt_masks'].masks
polygon_masks_ignore = results['gt_masks_ignore'].masks
h, w, _ = results['img_shape']
level_maps = self.generate_level_targets((h, w), polygon_masks,
polygon_masks_ignore)
results['mask_fields'].clear() # rm gt_masks encoded by polygons
mapping = {
'p3_maps': level_maps[0],
'p4_maps': level_maps[1],
'p5_maps': level_maps[2]
}
for key, value in mapping.items():
results[key] = value
return results

View File

@ -1,65 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet.core import BitmapMasks
from mmocr.registry import TRANSFORMS
from . import BaseTextDetTargets
@TRANSFORMS.register_module()
class PANetTargets(BaseTextDetTargets):
"""Generate the ground truths for PANet: Efficient and Accurate Arbitrary-
Shaped Text Detection with Pixel Aggregation Network.
[https://arxiv.org/abs/1908.05900]. This code is partially adapted from
https://github.com/WenmuZhou/PAN.pytorch.
Args:
shrink_ratio (tuple[float]): The ratios for shrinking text instances.
max_shrink (int): The maximum shrink distance.
"""
def __init__(self, shrink_ratio=(1.0, 0.5), max_shrink=20):
self.shrink_ratio = shrink_ratio
self.max_shrink = max_shrink
def generate_targets(self, results):
"""Generate the gt targets for PANet.
Args:
results (dict): The input result dictionary.
Returns:
results (dict): The output result dictionary.
"""
assert isinstance(results, dict)
polygon_masks = results['gt_masks'].masks
polygon_masks_ignore = results['gt_masks_ignore'].masks
h, w, _ = results['img_shape']
gt_kernels = []
for ratio in self.shrink_ratio:
mask, _ = self.generate_kernels((h, w),
polygon_masks,
ratio,
max_shrink=self.max_shrink,
ignore_tags=None)
gt_kernels.append(mask)
gt_mask = self.generate_effective_mask((h, w), polygon_masks_ignore)
results['mask_fields'].clear() # rm gt_masks encoded by polygons
if 'bbox_fields' in results:
results['bbox_fields'].clear()
results.pop('gt_labels', None)
results.pop('gt_masks', None)
results.pop('gt_bboxes', None)
results.pop('gt_bboxes_ignore', None)
mapping = {'gt_kernels': gt_kernels, 'gt_mask': gt_mask}
for key, value in mapping.items():
value = value if isinstance(value, list) else [value]
results[key] = BitmapMasks(value, h, w)
results['mask_fields'].append(key)
return results

View File

@ -1,22 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmocr.registry import TRANSFORMS
from . import PANetTargets
@TRANSFORMS.register_module()
class PSENetTargets(PANetTargets):
"""Generate the ground truth targets of PSENet: Shape robust text detection
with progressive scale expansion network.
[https://arxiv.org/abs/1903.12473]. This code is partially adapted from
https://github.com/whai362/PSENet.
Args:
shrink_ratio(tuple(float)): The ratios for shrinking text instances.
max_shrink(int): The maximum shrinking distance.
"""
def __init__(self,
shrink_ratio=(1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4),
max_shrink=20):
super().__init__(shrink_ratio=shrink_ratio, max_shrink=max_shrink)

View File

@ -1,496 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np
from mmdet.core import BitmapMasks
from numpy.linalg import norm
import mmocr.utils.check_argument as check_argument
from mmocr.registry import TRANSFORMS
from . import BaseTextDetTargets
@TRANSFORMS.register_module()
class TextSnakeTargets(BaseTextDetTargets):
"""Generate the ground truth targets of TextSnake: TextSnake: A Flexible
Representation for Detecting Text of Arbitrary Shapes.
[https://arxiv.org/abs/1807.01544]. This was partially adapted from
https://github.com/princewang1994/TextSnake.pytorch.
Args:
orientation_thr (float): The threshold for distinguishing between
head edge and tail edge among the horizontal and vertical edges
of a quadrangle.
"""
def __init__(self,
orientation_thr=2.0,
resample_step=4.0,
center_region_shrink_ratio=0.3):
super().__init__()
self.orientation_thr = orientation_thr
self.resample_step = resample_step
self.center_region_shrink_ratio = center_region_shrink_ratio
self.eps = 1e-8
def vector_angle(self, vec1, vec2):
if vec1.ndim > 1:
unit_vec1 = vec1 / (norm(vec1, axis=-1) + self.eps).reshape(
(-1, 1))
else:
unit_vec1 = vec1 / (norm(vec1, axis=-1) + self.eps)
if vec2.ndim > 1:
unit_vec2 = vec2 / (norm(vec2, axis=-1) + self.eps).reshape(
(-1, 1))
else:
unit_vec2 = vec2 / (norm(vec2, axis=-1) + self.eps)
return np.arccos(
np.clip(np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0))
def vector_slope(self, vec):
assert len(vec) == 2
return abs(vec[1] / (vec[0] + self.eps))
def vector_sin(self, vec):
assert len(vec) == 2
return vec[1] / (norm(vec) + self.eps)
def vector_cos(self, vec):
assert len(vec) == 2
return vec[0] / (norm(vec) + self.eps)
def find_head_tail(self, points, orientation_thr):
"""Find the head edge and tail edge of a text polygon.
Args:
points (ndarray): The points composing a text polygon.
orientation_thr (float): The threshold for distinguishing between
head edge and tail edge among the horizontal and vertical edges
of a quadrangle.
Returns:
head_inds (list): The indexes of two points composing head edge.
tail_inds (list): The indexes of two points composing tail edge.
"""
assert points.ndim == 2
assert points.shape[0] >= 4
assert points.shape[1] == 2
assert isinstance(orientation_thr, float)
if len(points) > 4:
pad_points = np.vstack([points, points[0]])
edge_vec = pad_points[1:] - pad_points[:-1]
theta_sum = []
adjacent_vec_theta = []
for i, edge_vec1 in enumerate(edge_vec):
adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]]
adjacent_edge_vec = edge_vec[adjacent_ind]
temp_theta_sum = np.sum(
self.vector_angle(edge_vec1, adjacent_edge_vec))
temp_adjacent_theta = self.vector_angle(
adjacent_edge_vec[0], adjacent_edge_vec[1])
theta_sum.append(temp_theta_sum)
adjacent_vec_theta.append(temp_adjacent_theta)
theta_sum_score = np.array(theta_sum) / np.pi
adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi
poly_center = np.mean(points, axis=0)
edge_dist = np.maximum(
norm(pad_points[1:] - poly_center, axis=-1),
norm(pad_points[:-1] - poly_center, axis=-1))
dist_score = edge_dist / (np.max(edge_dist) + self.eps)
position_score = np.zeros(len(edge_vec))
score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score
score += 0.35 * dist_score
if len(points) % 2 == 0:
position_score[(len(score) // 2 - 1)] += 1
position_score[-1] += 1
score += 0.1 * position_score
pad_score = np.concatenate([score, score])
score_matrix = np.zeros((len(score), len(score) - 3))
x = np.arange(len(score) - 3) / float(len(score) - 4)
gaussian = 1. / (np.sqrt(2. * np.pi) * 0.5) * np.exp(-np.power(
(x - 0.5) / 0.5, 2.) / 2)
gaussian = gaussian / np.max(gaussian)
for i in range(len(score)):
score_matrix[i, :] = score[i] + pad_score[
(i + 2):(i + len(score) - 1)] * gaussian * 0.3
head_start, tail_increment = np.unravel_index(
score_matrix.argmax(), score_matrix.shape)
tail_start = (head_start + tail_increment + 2) % len(points)
head_end = (head_start + 1) % len(points)
tail_end = (tail_start + 1) % len(points)
if head_end > tail_end:
head_start, tail_start = tail_start, head_start
head_end, tail_end = tail_end, head_end
head_inds = [head_start, head_end]
tail_inds = [tail_start, tail_end]
else:
if self.vector_slope(points[1] - points[0]) + self.vector_slope(
points[3] - points[2]) < self.vector_slope(
points[2] - points[1]) + self.vector_slope(points[0] -
points[3]):
horizontal_edge_inds = [[0, 1], [2, 3]]
vertical_edge_inds = [[3, 0], [1, 2]]
else:
horizontal_edge_inds = [[3, 0], [1, 2]]
vertical_edge_inds = [[0, 1], [2, 3]]
vertical_len_sum = norm(points[vertical_edge_inds[0][0]] -
points[vertical_edge_inds[0][1]]) + norm(
points[vertical_edge_inds[1][0]] -
points[vertical_edge_inds[1][1]])
horizontal_len_sum = norm(
points[horizontal_edge_inds[0][0]] -
points[horizontal_edge_inds[0][1]]) + norm(
points[horizontal_edge_inds[1][0]] -
points[horizontal_edge_inds[1][1]])
if vertical_len_sum > horizontal_len_sum * orientation_thr:
head_inds = horizontal_edge_inds[0]
tail_inds = horizontal_edge_inds[1]
else:
head_inds = vertical_edge_inds[0]
tail_inds = vertical_edge_inds[1]
return head_inds, tail_inds
def reorder_poly_edge(self, points):
"""Get the respective points composing head edge, tail edge, top
sideline and bottom sideline.
Args:
points (ndarray): The points composing a text polygon.
Returns:
head_edge (ndarray): The two points composing the head edge of text
polygon.
tail_edge (ndarray): The two points composing the tail edge of text
polygon.
top_sideline (ndarray): The points composing top curved sideline of
text polygon.
bot_sideline (ndarray): The points composing bottom curved sideline
of text polygon.
"""
assert points.ndim == 2
assert points.shape[0] >= 4
assert points.shape[1] == 2
head_inds, tail_inds = self.find_head_tail(points,
self.orientation_thr)
head_edge, tail_edge = points[head_inds], points[tail_inds]
pad_points = np.vstack([points, points])
if tail_inds[1] < 1:
tail_inds[1] = len(points)
sideline1 = pad_points[head_inds[1]:tail_inds[1]]
sideline2 = pad_points[tail_inds[1]:(head_inds[1] + len(points))]
sideline_mean_shift = np.mean(
sideline1, axis=0) - np.mean(
sideline2, axis=0)
if sideline_mean_shift[1] > 0:
top_sideline, bot_sideline = sideline2, sideline1
else:
top_sideline, bot_sideline = sideline1, sideline2
return head_edge, tail_edge, top_sideline, bot_sideline
def cal_curve_length(self, line):
"""Calculate the length of each edge on the discrete curve and the sum.
Args:
line (ndarray): The points composing a discrete curve.
Returns:
tuple: Returns (edges_length, total_length).
- | edge_length (ndarray): The length of each edge on the
discrete curve.
- | total_length (float): The total length of the discrete
curve.
"""
assert line.ndim == 2
assert len(line) >= 2
edges_length = np.sqrt((line[1:, 0] - line[:-1, 0])**2 +
(line[1:, 1] - line[:-1, 1])**2)
total_length = np.sum(edges_length)
return edges_length, total_length
def resample_line(self, line, n):
"""Resample n points on a line.
Args:
line (ndarray): The points composing a line.
n (int): The resampled points number.
Returns:
resampled_line (ndarray): The points composing the resampled line.
"""
assert line.ndim == 2
assert line.shape[0] >= 2
assert line.shape[1] == 2
assert isinstance(n, int)
assert n > 2
edges_length, total_length = self.cal_curve_length(line)
t_org = np.insert(np.cumsum(edges_length), 0, 0)
unit_t = total_length / (n - 1)
t_equidistant = np.arange(1, n - 1, dtype=np.float32) * unit_t
edge_ind = 0
points = [line[0]]
for t in t_equidistant:
while edge_ind < len(edges_length) - 1 and t > t_org[edge_ind + 1]:
edge_ind += 1
t_l, t_r = t_org[edge_ind], t_org[edge_ind + 1]
weight = np.array([t_r - t, t - t_l], dtype=np.float32) / (
t_r - t_l + self.eps)
p_coords = np.dot(weight, line[[edge_ind, edge_ind + 1]])
points.append(p_coords)
points.append(line[-1])
resampled_line = np.vstack(points)
return resampled_line
def resample_sidelines(self, sideline1, sideline2, resample_step):
"""Resample two sidelines to be of the same points number according to
step size.
Args:
sideline1 (ndarray): The points composing a sideline of a text
polygon.
sideline2 (ndarray): The points composing another sideline of a
text polygon.
resample_step (float): The resampled step size.
Returns:
resampled_line1 (ndarray): The resampled line 1.
resampled_line2 (ndarray): The resampled line 2.
"""
assert sideline1.ndim == sideline2.ndim == 2
assert sideline1.shape[1] == sideline2.shape[1] == 2
assert sideline1.shape[0] >= 2
assert sideline2.shape[0] >= 2
assert isinstance(resample_step, float)
_, length1 = self.cal_curve_length(sideline1)
_, length2 = self.cal_curve_length(sideline2)
avg_length = (length1 + length2) / 2
resample_point_num = max(int(float(avg_length) / resample_step) + 1, 3)
resampled_line1 = self.resample_line(sideline1, resample_point_num)
resampled_line2 = self.resample_line(sideline2, resample_point_num)
return resampled_line1, resampled_line2
def draw_center_region_maps(self, top_line, bot_line, center_line,
center_region_mask, radius_map, sin_map,
cos_map, region_shrink_ratio):
"""Draw attributes on text center region.
Args:
top_line (ndarray): The points composing top curved sideline of
text polygon.
bot_line (ndarray): The points composing bottom curved sideline
of text polygon.
center_line (ndarray): The points composing the center line of text
instance.
center_region_mask (ndarray): The text center region mask.
radius_map (ndarray): The map where the distance from point to
sidelines will be drawn on for each pixel in text center
region.
sin_map (ndarray): The map where vector_sin(theta) will be drawn
on text center regions. Theta is the angle between tangent
line and vector (1, 0).
cos_map (ndarray): The map where vector_cos(theta) will be drawn on
text center regions. Theta is the angle between tangent line
and vector (1, 0).
region_shrink_ratio (float): The shrink ratio of text center.
"""
assert top_line.shape == bot_line.shape == center_line.shape
assert (center_region_mask.shape == radius_map.shape == sin_map.shape
== cos_map.shape)
assert isinstance(region_shrink_ratio, float)
for i in range(0, len(center_line) - 1):
top_mid_point = (top_line[i] + top_line[i + 1]) / 2
bot_mid_point = (bot_line[i] + bot_line[i + 1]) / 2
radius = norm(top_mid_point - bot_mid_point) / 2
text_direction = center_line[i + 1] - center_line[i]
sin_theta = self.vector_sin(text_direction)
cos_theta = self.vector_cos(text_direction)
tl = center_line[i] + (top_line[i] -
center_line[i]) * region_shrink_ratio
tr = center_line[i + 1] + (
top_line[i + 1] - center_line[i + 1]) * region_shrink_ratio
br = center_line[i + 1] + (
bot_line[i + 1] - center_line[i + 1]) * region_shrink_ratio
bl = center_line[i] + (bot_line[i] -
center_line[i]) * region_shrink_ratio
current_center_box = np.vstack([tl, tr, br, bl]).astype(np.int32)
cv2.fillPoly(center_region_mask, [current_center_box], color=1)
cv2.fillPoly(sin_map, [current_center_box], color=sin_theta)
cv2.fillPoly(cos_map, [current_center_box], color=cos_theta)
cv2.fillPoly(radius_map, [current_center_box], color=radius)
def generate_center_mask_attrib_maps(self, img_size, text_polys):
"""Generate text center region mask and geometric attribute maps.
Args:
img_size (tuple): The image size of (height, width).
text_polys (list[list[ndarray]]): The list of text polygons.
Returns:
center_region_mask (ndarray): The text center region mask.
radius_map (ndarray): The distance map from each pixel in text
center region to top sideline.
sin_map (ndarray): The sin(theta) map where theta is the angle
between vector (top point - bottom point) and vector (1, 0).
cos_map (ndarray): The cos(theta) map where theta is the angle
between vector (top point - bottom point) and vector (1, 0).
"""
assert isinstance(img_size, tuple)
assert check_argument.is_2dlist(text_polys)
h, w = img_size
center_region_mask = np.zeros((h, w), np.uint8)
radius_map = np.zeros((h, w), dtype=np.float32)
sin_map = np.zeros((h, w), dtype=np.float32)
cos_map = np.zeros((h, w), dtype=np.float32)
for poly in text_polys:
assert len(poly) == 1
text_instance = [[poly[0][i], poly[0][i + 1]]
for i in range(0, len(poly[0]), 2)]
polygon_points = np.array(text_instance).reshape(-1, 2)
n = len(polygon_points)
keep_inds = []
for i in range(n):
if norm(polygon_points[i] -
polygon_points[(i + 1) % n]) > 1e-5:
keep_inds.append(i)
polygon_points = polygon_points[keep_inds]
_, _, top_line, bot_line = self.reorder_poly_edge(polygon_points)
resampled_top_line, resampled_bot_line = self.resample_sidelines(
top_line, bot_line, self.resample_step)
resampled_bot_line = resampled_bot_line[::-1]
center_line = (resampled_top_line + resampled_bot_line) / 2
if self.vector_slope(center_line[-1] - center_line[0]) > 0.9:
if (center_line[-1] - center_line[0])[1] < 0:
center_line = center_line[::-1]
resampled_top_line = resampled_top_line[::-1]
resampled_bot_line = resampled_bot_line[::-1]
else:
if (center_line[-1] - center_line[0])[0] < 0:
center_line = center_line[::-1]
resampled_top_line = resampled_top_line[::-1]
resampled_bot_line = resampled_bot_line[::-1]
line_head_shrink_len = norm(resampled_top_line[0] -
resampled_bot_line[0]) / 4.0
line_tail_shrink_len = norm(resampled_top_line[-1] -
resampled_bot_line[-1]) / 4.0
head_shrink_num = int(line_head_shrink_len // self.resample_step)
tail_shrink_num = int(line_tail_shrink_len // self.resample_step)
if len(center_line) > head_shrink_num + tail_shrink_num + 2:
center_line = center_line[head_shrink_num:len(center_line) -
tail_shrink_num]
resampled_top_line = resampled_top_line[
head_shrink_num:len(resampled_top_line) - tail_shrink_num]
resampled_bot_line = resampled_bot_line[
head_shrink_num:len(resampled_bot_line) - tail_shrink_num]
self.draw_center_region_maps(resampled_top_line,
resampled_bot_line, center_line,
center_region_mask, radius_map,
sin_map, cos_map,
self.center_region_shrink_ratio)
return center_region_mask, radius_map, sin_map, cos_map
def generate_text_region_mask(self, img_size, text_polys):
"""Generate text center region mask and geometry attribute maps.
Args:
img_size (tuple): The image size (height, width).
text_polys (list[list[ndarray]]): The list of text polygons.
Returns:
text_region_mask (ndarray): The text region mask.
"""
assert isinstance(img_size, tuple)
assert check_argument.is_2dlist(text_polys)
h, w = img_size
text_region_mask = np.zeros((h, w), dtype=np.uint8)
for poly in text_polys:
assert len(poly) == 1
text_instance = [[poly[0][i], poly[0][i + 1]]
for i in range(0, len(poly[0]), 2)]
polygon = np.array(
text_instance, dtype=np.int32).reshape((1, -1, 2))
cv2.fillPoly(text_region_mask, polygon, 1)
return text_region_mask
def generate_targets(self, results):
"""Generate the gt targets for TextSnake.
Args:
results (dict): The input result dictionary.
Returns:
results (dict): The output result dictionary.
"""
assert isinstance(results, dict)
polygon_masks = results['gt_masks'].masks
polygon_masks_ignore = results['gt_masks_ignore'].masks
h, w, _ = results['img_shape']
gt_text_mask = self.generate_text_region_mask((h, w), polygon_masks)
gt_mask = self.generate_effective_mask((h, w), polygon_masks_ignore)
(gt_center_region_mask, gt_radius_map, gt_sin_map,
gt_cos_map) = self.generate_center_mask_attrib_maps((h, w),
polygon_masks)
results['mask_fields'].clear() # rm gt_masks encoded by polygons
mapping = {
'gt_text_mask': gt_text_mask,
'gt_center_region_mask': gt_center_region_mask,
'gt_mask': gt_mask,
'gt_radius_map': gt_radius_map,
'gt_sin_map': gt_sin_map,
'gt_cos_map': gt_cos_map
}
for key, value in mapping.items():
value = value if isinstance(value, list) else [value]
results[key] = BitmapMasks(value, h, w)
results['mask_fields'].append(key)
return results

View File

@ -1,97 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import math
import numpy as np
from mmdet.datasets.pipelines.transforms import Resize
from mmocr.registry import TRANSFORMS
from mmocr.utils import check_argument
@TRANSFORMS.register_module()
class ScaleAspectJitter(Resize):
"""Resize image and segmentation mask encoded by coordinates.
Allowed resize types are `around_min_img_scale`, `long_short_bound`, and
`indep_sample_in_range`.
"""
def __init__(self,
img_scale=None,
multiscale_mode='range',
ratio_range=None,
keep_ratio=False,
resize_type='around_min_img_scale',
aspect_ratio_range=None,
long_size_bound=None,
short_size_bound=None,
scale_range=None):
super().__init__(
img_scale=img_scale,
multiscale_mode=multiscale_mode,
ratio_range=ratio_range,
keep_ratio=keep_ratio)
assert not keep_ratio
assert resize_type in [
'around_min_img_scale', 'long_short_bound', 'indep_sample_in_range'
]
self.resize_type = resize_type
if resize_type == 'indep_sample_in_range':
assert ratio_range is None
assert aspect_ratio_range is None
assert short_size_bound is None
assert long_size_bound is None
assert scale_range is not None
else:
assert scale_range is None
assert isinstance(ratio_range, tuple)
assert isinstance(aspect_ratio_range, tuple)
assert check_argument.equal_len(ratio_range, aspect_ratio_range)
if resize_type in ['long_short_bound']:
assert short_size_bound is not None
assert long_size_bound is not None
self.aspect_ratio_range = aspect_ratio_range
self.long_size_bound = long_size_bound
self.short_size_bound = short_size_bound
self.scale_range = scale_range
@staticmethod
def sample_from_range(range):
assert len(range) == 2
min_value, max_value = min(range), max(range)
value = np.random.random_sample() * (max_value - min_value) + min_value
return value
def _random_scale(self, results):
if self.resize_type == 'indep_sample_in_range':
w = self.sample_from_range(self.scale_range)
h = self.sample_from_range(self.scale_range)
results['scale'] = (int(w), int(h)) # (w,h)
results['scale_idx'] = None
return
h, w = results['img'].shape[0:2]
if self.resize_type == 'long_short_bound':
scale1 = 1
if max(h, w) > self.long_size_bound:
scale1 = self.long_size_bound / max(h, w)
scale2 = self.sample_from_range(self.ratio_range)
scale = scale1 * scale2
if min(h, w) * scale <= self.short_size_bound:
scale = (self.short_size_bound + 10) * 1.0 / min(h, w)
elif self.resize_type == 'around_min_img_scale':
short_size = min(self.img_scale[0])
ratio = self.sample_from_range(self.ratio_range)
scale = (ratio * short_size) / min(h, w)
else:
raise NotImplementedError
aspect = self.sample_from_range(self.aspect_ratio_range)
h_scale = scale * math.sqrt(aspect)
w_scale = scale / math.sqrt(aspect)
results['scale'] = (int(w * w_scale), int(h * h_scale)) # (w,h)
results['scale_idx'] = None

View File

@ -1,152 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from collections import defaultdict
import numpy as np
from mmcv.utils import print_log
from mmdet.datasets import ConcatDataset
from mmocr.registry import DATASETS
from mmocr.utils import is_2dlist, is_type_list
@DATASETS.register_module()
class UniformConcatDataset(ConcatDataset):
"""A wrapper of ConcatDataset which support dataset pipeline assignment and
replacement.
Args:
datasets (list[dict] | list[list[dict]]): A list of datasets cfgs.
separate_eval (bool): Whether to evaluate the results
separately if it is used as validation dataset.
Defaults to True.
show_mean_scores (str | bool): Whether to compute the mean evaluation
results, only applicable when ``separate_eval=True``. Options are
[True, False, ``auto``]. If ``True``, mean results will be added to
the result dictionary with keys in the form of
``mean_{metric_name}``. If 'auto', mean results will be shown only
when more than 1 dataset is wrapped.
pipeline (None | list[dict] | list[list[dict]]): If ``None``,
each dataset in datasets use its own pipeline;
If ``list[dict]``, it will be assigned to the dataset whose
pipeline is None in datasets;
If ``list[list[dict]]``, pipeline of dataset which is None
in datasets will be replaced by the corresponding pipeline
in the list.
force_apply (bool): If True, apply pipeline above to each dataset
even if it have its own pipeline. Default: False.
"""
def __init__(self,
datasets,
separate_eval=True,
show_mean_scores='auto',
pipeline=None,
force_apply=False,
**kwargs):
new_datasets = []
if pipeline is not None:
assert isinstance(
pipeline,
list), 'pipeline must be list[dict] or list[list[dict]].'
if is_type_list(pipeline, dict):
self._apply_pipeline(datasets, pipeline, force_apply)
new_datasets = datasets
elif is_2dlist(pipeline):
assert is_2dlist(datasets)
assert len(datasets) == len(pipeline)
for sub_datasets, tmp_pipeline in zip(datasets, pipeline):
self._apply_pipeline(sub_datasets, tmp_pipeline,
force_apply)
new_datasets.extend(sub_datasets)
else:
if is_2dlist(datasets):
for sub_datasets in datasets:
new_datasets.extend(sub_datasets)
else:
new_datasets = datasets
datasets = [DATASETS.build(c, kwargs) for c in new_datasets]
super().__init__(datasets, separate_eval)
if not separate_eval:
raise NotImplementedError(
'Evaluating datasets as a whole is not'
' supported yet. Please use "separate_eval=True"')
assert isinstance(show_mean_scores, bool) or show_mean_scores == 'auto'
if show_mean_scores == 'auto':
show_mean_scores = len(self.datasets) > 1
self.show_mean_scores = show_mean_scores
if show_mean_scores is True or show_mean_scores == 'auto' and len(
self.datasets) > 1:
if len({type(ds) for ds in self.datasets}) != 1:
raise NotImplementedError(
'To compute mean evaluation scores, all datasets'
'must have the same type')
@staticmethod
def _apply_pipeline(datasets, pipeline, force_apply=False):
from_cfg = all(isinstance(x, dict) for x in datasets)
assert from_cfg, 'datasets should be config dicts'
assert all(isinstance(x, dict) for x in pipeline)
for dataset in datasets:
if dataset['pipeline'] is None or force_apply:
dataset['pipeline'] = copy.deepcopy(pipeline)
def evaluate(self, results, logger=None, **kwargs):
"""Evaluate the results.
Args:
results (list[list | tuple]): Testing results of the dataset.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
Returns:
dict[str: float]: Results of each separate
dataset if `self.separate_eval=True`.
"""
assert len(results) == self.cumulative_sizes[-1], \
('Dataset and results have different sizes: '
f'{self.cumulative_sizes[-1]} v.s. {len(results)}')
# Check whether all the datasets support evaluation
for dataset in self.datasets:
assert hasattr(dataset, 'evaluate'), \
f'{type(dataset)} does not implement evaluate function'
if self.separate_eval:
dataset_idx = -1
total_eval_results = dict()
if self.show_mean_scores:
mean_eval_results = defaultdict(list)
for dataset in self.datasets:
start_idx = 0 if dataset_idx == -1 else \
self.cumulative_sizes[dataset_idx]
end_idx = self.cumulative_sizes[dataset_idx + 1]
results_per_dataset = results[start_idx:end_idx]
print_log(
f'\nEvaluating {dataset.ann_file} with '
f'{len(results_per_dataset)} images now',
logger=logger)
eval_results_per_dataset = dataset.evaluate(
results_per_dataset, logger=logger, **kwargs)
dataset_idx += 1
for k, v in eval_results_per_dataset.items():
total_eval_results.update({f'{dataset_idx}_{k}': v})
if self.show_mean_scores:
mean_eval_results[k].append(v)
if self.show_mean_scores:
for k, v in mean_eval_results.items():
total_eval_results[f'mean_{k}'] = np.mean(v)
return total_eval_results
else:
raise NotImplementedError(
'Evaluating datasets as a whole is not'
' supported yet. Please use "separate_eval=True"')

View File

@ -1,75 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import numpy as np
import pytest
from mmocr.datasets.base_dataset import BaseDataset
def _create_dummy_ann_file(ann_file):
ann_info1 = 'sample1.jpg hello'
ann_info2 = 'sample2.jpg world'
with open(ann_file, 'w') as fw:
for ann_info in [ann_info1, ann_info2]:
fw.write(ann_info + '\n')
def _create_dummy_loader():
loader = dict(
type='HardDiskLoader',
repeat=1,
parser=dict(type='LineStrParser', keys=['file_name', 'text']))
return loader
def test_custom_dataset():
tmp_dir = tempfile.TemporaryDirectory()
# create dummy data
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
_create_dummy_ann_file(ann_file)
loader = _create_dummy_loader()
for mode in [True, False]:
dataset = BaseDataset(ann_file, loader, pipeline=[], test_mode=mode)
# test len
assert len(dataset) == len(dataset.data_infos)
# test set group flag
assert np.allclose(dataset.flag, [0, 0])
# test prepare_train_img
expect_results = {
'img_info': {
'file_name': 'sample1.jpg',
'text': 'hello'
},
'img_prefix': ''
}
assert dataset.prepare_train_img(0) == expect_results
# test prepare_test_img
assert dataset.prepare_test_img(0) == expect_results
# test __getitem__
assert dataset[0] == expect_results
# test get_next_index
assert dataset._get_next_index(0) == 1
# test format_resuls
expect_results_copy = {
key: value
for key, value in expect_results.items()
}
dataset.format_results(expect_results)
assert expect_results_copy == expect_results
# test evaluate
with pytest.raises(NotImplementedError):
dataset.evaluate(expect_results)
tmp_dir.cleanup()

View File

@ -1,18 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import mmocr.datasets.pipelines.dbnet_transforms as transforms
def test_eastrandomcrop():
crop = transforms.EastRandomCrop(target_size=(60, 60), max_tries=100)
img = np.random.rand(3, 100, 200)
poly = np.array([[[0, 0, 50, 0, 50, 50, 0, 50]],
[[20, 20, 50, 20, 50, 50, 20, 50]]])
box = np.array([[0, 0, 50, 50], [20, 20, 50, 50]])
results = dict(img=img, gt_masks=poly, bboxes=box)
results['mask_fields'] = ['gt_masks']
results['bbox_fields'] = ['bboxes']
results = crop(results)
assert np.allclose(results['bboxes'][0],
results['gt_masks'].masks[0][0][[0, 2]].flatten())

View File

@ -1,84 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os.path as osp
import tempfile
import numpy as np
from mmocr.datasets.text_det_dataset import TextDetDataset
def _create_dummy_ann_file(ann_file):
ann_info1 = {
'file_name':
'sample1.jpg',
'height':
640,
'width':
640,
'annotations': [{
'iscrowd': 0,
'category_id': 1,
'bbox': [50, 70, 80, 100],
'segmentation': [[50, 70, 80, 70, 80, 100, 50, 100]]
}, {
'iscrowd':
1,
'category_id':
1,
'bbox': [120, 140, 200, 200],
'segmentation': [[120, 140, 200, 140, 200, 200, 120, 200]]
}]
}
with open(ann_file, 'w') as fw:
fw.write(json.dumps(ann_info1) + '\n')
def _create_dummy_loader():
loader = dict(
type='HardDiskLoader',
repeat=1,
parser=dict(
type='LineJsonParser',
keys=['file_name', 'height', 'width', 'annotations']))
return loader
def test_detect_dataset():
tmp_dir = tempfile.TemporaryDirectory()
# create dummy data
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
_create_dummy_ann_file(ann_file)
# test initialization
loader = _create_dummy_loader()
dataset = TextDetDataset(ann_file, loader, pipeline=[])
# test _parse_ann_info
img_ann_info = dataset.data_infos[0]
ann = dataset._parse_anno_info(img_ann_info['annotations'])
print(ann['bboxes'])
assert np.allclose(ann['bboxes'], [[50., 70., 80., 100.]])
assert np.allclose(ann['labels'], [1])
assert np.allclose(ann['bboxes_ignore'], [[120, 140, 200, 200]])
assert np.allclose(ann['masks'], [[[50, 70, 80, 70, 80, 100, 50, 100]]])
assert np.allclose(ann['masks_ignore'],
[[[120, 140, 200, 140, 200, 200, 120, 200]]])
tmp_dir.cleanup()
# test prepare_train_img
pipeline_results = dataset.prepare_train_img(0)
assert np.allclose(pipeline_results['bbox_fields'], [])
assert np.allclose(pipeline_results['mask_fields'], [])
assert np.allclose(pipeline_results['seg_fields'], [])
expect_img_info = {'filename': 'sample1.jpg', 'height': 640, 'width': 640}
assert pipeline_results['img_info'] == expect_img_info
# test evluation
metrics = 'hmean-iou'
results = [{'boundary_result': [[50, 70, 80, 70, 80, 100, 50, 100, 1]]}]
eval_res = dataset.evaluate(results, metrics)
assert eval_res['hmean-iou:hmean'] == 1

View File

@ -1,171 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import mmcv
import numpy as np
from mmocr.datasets.icdar_dataset import IcdarDataset
def _create_dummy_icdar_json(json_name):
image_1 = {
'id': 0,
'width': 640,
'height': 640,
'file_name': 'fake_name.jpg',
}
image_2 = {
'id': 1,
'width': 640,
'height': 640,
'file_name': 'fake_name1.jpg',
}
annotation_1 = {
'id': 1,
'image_id': 0,
'category_id': 0,
'area': 400,
'bbox': [50, 60, 20, 20],
'iscrowd': 0,
'segmentation': [[50, 60, 70, 60, 70, 80, 50, 80]]
}
annotation_2 = {
'id': 2,
'image_id': 0,
'category_id': 0,
'area': 900,
'bbox': [100, 120, 30, 30],
'iscrowd': 0,
'segmentation': [[100, 120, 130, 120, 120, 150, 100, 150]]
}
annotation_3 = {
'id': 3,
'image_id': 0,
'category_id': 0,
'area': 1600,
'bbox': [150, 160, 40, 40],
'iscrowd': 1,
'segmentation': [[150, 160, 190, 160, 190, 200, 150, 200]]
}
annotation_4 = {
'id': 4,
'image_id': 0,
'category_id': 0,
'area': 10000,
'bbox': [250, 260, 100, 100],
'iscrowd': 1,
'segmentation': [[250, 260, 350, 260, 350, 360, 250, 360]]
}
annotation_5 = {
'id': 5,
'image_id': 1,
'category_id': 0,
'area': 10000,
'bbox': [250, 260, 100, 100],
'iscrowd': 1,
'segmentation': [[250, 260, 350, 260, 350, 360, 250, 360]]
}
categories = [{
'id': 0,
'name': 'text',
'supercategory': 'text',
}]
fake_json = {
'images': [image_1, image_2],
'annotations':
[annotation_1, annotation_2, annotation_3, annotation_4, annotation_5],
'categories':
categories
}
mmcv.dump(fake_json, json_name)
def test_icdar_dataset():
tmp_dir = tempfile.TemporaryDirectory()
# create dummy data
fake_json_file = osp.join(tmp_dir.name, 'fake_data.json')
_create_dummy_icdar_json(fake_json_file)
# test initialization
dataset = IcdarDataset(ann_file=fake_json_file, pipeline=[])
assert dataset.CLASSES == ('text')
assert dataset.img_ids == [0, 1]
assert dataset.select_first_k == -1
# test _parse_ann_info
ann = dataset.get_ann_info(0)
assert np.allclose(ann['bboxes'],
[[50., 60., 70., 80.], [100., 120., 130., 150.]])
assert np.allclose(ann['labels'], [0, 0])
assert np.allclose(ann['bboxes_ignore'],
[[150., 160., 190., 200.], [250., 260., 350., 360.]])
assert np.allclose(ann['masks'],
[[[50, 60, 70, 60, 70, 80, 50, 80]],
[[100, 120, 130, 120, 120, 150, 100, 150]]])
assert np.allclose(ann['masks_ignore'],
[[[150, 160, 190, 160, 190, 200, 150, 200]],
[[250, 260, 350, 260, 350, 360, 250, 360]]])
assert dataset.cat_ids == [0]
tmp_dir.cleanup()
# test rank output
# result = [[]]
# out_file = tempfile.NamedTemporaryFile().name
# with pytest.raises(AssertionError):
# dataset.output_ranklist(result, out_file)
# result = [{'hmean': 1}, {'hmean': 0.5}]
# output = dataset.output_ranklist(result, out_file)
# assert output[0]['hmean'] == 0.5
# test get_gt_mask
# output = dataset.get_gt_mask()
# assert np.allclose(output[0][0],
# [[50, 60, 70, 60, 70, 80, 50, 80],
# [100, 120, 130, 120, 120, 150, 100, 150]])
# assert output[0][1] == []
# assert np.allclose(output[1][0],
# [[150, 160, 190, 160, 190, 200, 150, 200],
# [250, 260, 350, 260, 350, 360, 250, 360]])
# assert np.allclose(output[1][1],
# [[250, 260, 350, 260, 350, 360, 250, 360]])
# test evluation
metrics = ['hmean-iou', 'hmean-ic13']
results = [{
'boundary_result': [[50, 60, 70, 60, 70, 80, 50, 80, 1],
[100, 120, 130, 120, 120, 150, 100, 150, 1]]
}, {
'boundary_result': []
}]
output = dataset.evaluate(results, metrics)
assert output['hmean-iou:hmean'] == 1
assert output['hmean-ic13:hmean'] == 1
results = [{
'boundary_result': [[50, 60, 70, 60, 70, 80, 50, 80, 0.5],
[100, 120, 130, 120, 120, 150, 100, 150, 1]]
}, {
'boundary_result': []
}]
output = dataset.evaluate(
results, metrics, min_score_thr=0, max_score_thr=1, step=0.5)
assert output['hmean-iou:hmean'] == 1
assert output['hmean-ic13:hmean'] == 1
output = dataset.evaluate(
results, metrics, min_score_thr=0.6, max_score_thr=1, step=0.5)
assert output['hmean-iou:hmean'] == 1 / 1.5
assert output['hmean-ic13:hmean'] == 1 / 1.5

View File

@ -1,128 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import math
import os.path as osp
import tempfile
import pytest
import torch
from mmocr.datasets.kie_dataset import KIEDataset
def _create_dummy_ann_file(ann_file):
ann_info1 = {
'file_name':
'sample1.png',
'height':
200,
'width':
200,
'annotations': [{
'text': 'store',
'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0],
'label': 1
}, {
'text': 'address',
'box': [23.0, 2.0, 31.0, 1.0, 24.0, 11.0, 16.0, 11.0],
'label': 1
}, {
'text': 'price',
'box': [33.0, 2.0, 43.0, 2.0, 36.0, 12.0, 25.0, 12.0],
'label': 1
}, {
'text': '1.0',
'box': [46.0, 2.0, 61.0, 2.0, 53.0, 12.0, 39.0, 12.0],
'label': 1
}, {
'text': 'google',
'box': [61.0, 2.0, 69.0, 2.0, 63.0, 12.0, 55.0, 12.0],
'label': 1
}]
}
with open(ann_file, 'w') as fw:
for ann_info in [ann_info1]:
fw.write(json.dumps(ann_info) + '\n')
return ann_info1
def _create_dummy_dict_file(dict_file):
dict_str = '0123'
with open(dict_file, 'w') as fw:
for char in list(dict_str):
fw.write(char + '\n')
return dict_str
def _create_dummy_loader():
loader = dict(
type='HardDiskLoader',
repeat=1,
parser=dict(
type='LineJsonParser',
keys=['file_name', 'height', 'width', 'annotations']))
return loader
def test_kie_dataset():
tmp_dir = tempfile.TemporaryDirectory()
# create dummy data
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
ann_info1 = _create_dummy_ann_file(ann_file)
dict_file = osp.join(tmp_dir.name, 'fake_dict.txt')
_create_dummy_dict_file(dict_file)
# test initialization
loader = _create_dummy_loader()
dataset = KIEDataset(ann_file, loader, dict_file, pipeline=[])
tmp_dir.cleanup()
dataset.prepare_train_img(0)
# test pre_pipeline
img_ann_info = dataset.data_infos[0]
img_info = {
'filename': img_ann_info['file_name'],
'height': img_ann_info['height'],
'width': img_ann_info['width']
}
ann_info = dataset._parse_anno_info(img_ann_info['annotations'])
results = dict(img_info=img_info, ann_info=ann_info)
dataset.pre_pipeline(results)
assert results['img_prefix'] == dataset.img_prefix
# test _parse_anno_info
annos = ann_info1['annotations']
with pytest.raises(AssertionError):
dataset._parse_anno_info(annos[0])
tmp_annos = [{
'text': 'store',
'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0]
}]
dataset._parse_anno_info(tmp_annos)
tmp_annos = [{'text': 'store'}]
with pytest.raises(AssertionError):
dataset._parse_anno_info(tmp_annos)
return_anno = dataset._parse_anno_info(annos)
assert 'bboxes' in return_anno
assert 'relations' in return_anno
assert 'texts' in return_anno
assert 'labels' in return_anno
# test evaluation
result = {}
result['nodes'] = torch.full((5, 5), 1, dtype=torch.float)
result['nodes'][:, 1] = 100.
print('hello', result['nodes'].size())
results = [result for _ in range(5)]
eval_res = dataset.evaluate(results)
assert math.isclose(eval_res['macro_f1'], 0.2, abs_tol=1e-4)
test_kie_dataset()

View File

@ -1,96 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os.path as osp
import tempfile
import pytest
from mmocr.datasets.utils.backend import (HardDiskAnnFileBackend,
HTTPAnnFileBackend,
PetrelAnnFileBackend)
from mmocr.datasets.utils.loader import AnnFileLoader
from mmocr.utils import recog2lmdb
def _create_dummy_line_str_file(ann_file):
ann_info1 = 'sample1.jpg hello'
ann_info2 = 'sample2.jpg world'
with open(ann_file, 'w') as fw:
for ann_info in [ann_info1, ann_info2]:
fw.write(ann_info + '\n')
def _create_dummy_line_json_file(ann_file):
ann_info1 = {'filename': 'sample1.jpg', 'text': 'hello'}
ann_info2 = {'filename': 'sample2.jpg', 'text': 'world'}
with open(ann_file, 'w') as fw:
for ann_info in [ann_info1, ann_info2]:
fw.write(json.dumps(ann_info) + '\n')
def test_loader():
tmp_dir = tempfile.TemporaryDirectory()
# create dummy data
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
_create_dummy_line_str_file(ann_file)
parser = dict(
type='LineStrParser',
keys=['filename', 'text'],
keys_idx=[0, 1],
separator=' ')
with pytest.raises(AssertionError):
AnnFileLoader(ann_file, parser, repeat=0)
with pytest.raises(AssertionError):
AnnFileLoader(ann_file, [], repeat=1)
# test text loader and line str parser
text_loader = AnnFileLoader(ann_file, parser, repeat=1, file_format='txt')
assert len(text_loader) == 2
assert text_loader.ori_data_infos[0] == 'sample1.jpg hello'
assert text_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
# test text loader and linedict parser
_create_dummy_line_json_file(ann_file)
json_parser = dict(type='LineJsonParser', keys=['filename', 'text'])
text_loader = AnnFileLoader(
ann_file, json_parser, repeat=1, file_format='txt')
assert text_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
# test text loader and linedict parser
_create_dummy_line_json_file(ann_file)
json_parser = dict(type='LineJsonParser', keys=['filename', 'text'])
text_loader = AnnFileLoader(
ann_file, json_parser, repeat=1, file_format='txt')
it = iter(text_loader)
with pytest.raises(StopIteration):
for _ in range(len(text_loader) + 1):
next(it)
# test lmdb loader and line json parser
_create_dummy_line_str_file(ann_file)
lmdb_file = osp.join(tmp_dir.name, 'fake_data.lmdb')
recog2lmdb(
img_root=None,
label_path=ann_file,
label_only=True,
output=lmdb_file,
lmdb_map_size=102400)
parser = dict(type='LineJsonParser', keys=['filename', 'text'])
lmdb_loader = AnnFileLoader(
lmdb_file, parser, repeat=1, file_format='lmdb')
assert lmdb_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
lmdb_loader.close()
with pytest.raises(AssertionError):
HardDiskAnnFileBackend(file_format='json')
with pytest.raises(AssertionError):
PetrelAnnFileBackend(file_format='json')
with pytest.raises(AssertionError):
HTTPAnnFileBackend(file_format='json')
tmp_dir.cleanup()

View File

@ -1,86 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import numpy as np
from mmocr.datasets.pipelines import LoadImageFromNdarray, LoadTextAnnotations
def _create_dummy_ann():
results = {}
results['img_info'] = {}
results['img_info']['height'] = 1000
results['img_info']['width'] = 1000
results['ann_info'] = {}
results['ann_info']['masks'] = []
results['mask_fields'] = []
results['ann_info']['masks_ignore'] = [
[[499, 94, 531, 94, 531, 124, 499, 124]],
[[3, 156, 81, 155, 78, 181, 0, 182]],
[[11, 223, 59, 221, 59, 234, 11, 236]],
[[500, 156, 551, 156, 550, 165, 499, 165]]
]
return results
def test_loadtextannotation():
results = _create_dummy_ann()
with_bbox = True
with_label = True
with_mask = True
with_seg = False
poly2mask = False
# If no 'ori_shape' in result but use_img_shape=True,
# result['img_info']['height'] and result['img_info']['width']
# will be used to generate mask.
loader = LoadTextAnnotations(
with_bbox,
with_label,
with_mask,
with_seg,
poly2mask,
use_img_shape=True)
tmp_results = copy.deepcopy(results)
output = loader._load_masks(tmp_results)
assert len(output['gt_masks_ignore']) == 4
assert np.allclose(output['gt_masks_ignore'].masks[0],
[[499, 94, 531, 94, 531, 124, 499, 124]])
assert output['gt_masks_ignore'].height == results['img_info']['height']
# If 'ori_shape' in result and use_img_shape=True,
# result['ori_shape'] will be used to generate mask.
loader = LoadTextAnnotations(
with_bbox,
with_label,
with_mask,
with_seg,
poly2mask=True,
use_img_shape=True)
tmp_results = copy.deepcopy(results)
tmp_results['ori_shape'] = (640, 640, 3)
output = loader._load_masks(tmp_results)
assert output['img_info']['height'] == 640
assert output['gt_masks_ignore'].height == 640
def test_load_img_from_numpy():
result = {'img': np.ones((32, 100, 3), dtype=np.uint8)}
load = LoadImageFromNdarray(color_type='color')
output = load(result)
assert output['img'].shape[2] == 3
assert len(output['img'].shape) == 3
result = {'img': np.ones((32, 100, 1), dtype=np.uint8)}
load = LoadImageFromNdarray(color_type='color')
output = load(result)
assert output['img'].shape[2] == 3
result = {'img': np.ones((32, 100, 3), dtype=np.uint8)}
load = LoadImageFromNdarray(color_type='grayscale', to_float32=True)
output = load(result)
assert output['img'].shape[2] == 1

View File

@ -1,114 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os.path as osp
import tempfile
import torch
from mmocr.datasets.ner_dataset import NerDataset
from mmocr.models.ner.convertors.ner_convertor import NerConvertor
from mmocr.utils import list_to_file
def _create_dummy_ann_file(ann_file):
data = {
'text': '彭小军认为,国内银行现在走的是台湾的发卡模式',
'label': {
'address': {
'台湾': [[15, 16]]
},
'name': {
'彭小军': [[0, 2]]
}
}
}
list_to_file(ann_file, [json.dumps(data, ensure_ascii=False)])
def _create_dummy_vocab_file(vocab_file):
for char in list(map(chr, range(ord('a'), ord('z') + 1))):
list_to_file(vocab_file, [json.dumps(char + '\n', ensure_ascii=False)])
def _create_dummy_loader():
loader = dict(
type='HardDiskLoader',
repeat=1,
parser=dict(type='LineJsonParser', keys=['text', 'label']))
return loader
def test_ner_dataset():
# test initialization
loader = _create_dummy_loader()
categories = [
'address', 'book', 'company', 'game', 'government', 'movie', 'name',
'organization', 'position', 'scene'
]
# create dummy data
tmp_dir = tempfile.TemporaryDirectory()
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
vocab_file = osp.join(tmp_dir.name, 'fake_vocab.txt')
_create_dummy_ann_file(ann_file)
_create_dummy_vocab_file(vocab_file)
max_len = 128
ner_convertor = dict(
type='NerConvertor',
annotation_type='bio',
vocab_file=vocab_file,
categories=categories,
max_len=max_len)
test_pipeline = [
dict(
type='NerTransform',
label_convertor=ner_convertor,
max_len=max_len),
dict(type='ToTensorNER')
]
dataset = NerDataset(ann_file, loader, pipeline=test_pipeline)
# test pre_pipeline
img_info = dataset.data_infos[0]
results = dict(img_info=img_info)
dataset.pre_pipeline(results)
# test prepare_train_img
dataset.prepare_train_img(0)
# test evaluation
result = [[['address', 15, 16], ['name', 0, 2]]]
dataset.evaluate(result)
# test pred convert2entity function
pred = [
21, 7, 17, 17, 21, 21, 21, 21, 21, 21, 13, 21, 21, 21, 21, 21, 1, 11,
21, 21, 7, 17, 17, 21, 21, 21, 21, 21, 21, 13, 21, 21, 21, 21, 21, 1,
11, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 1, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 1, 21, 21, 21, 21,
21, 21
]
preds = [pred[:128]]
mask = [0] * 128
for i in range(10):
mask[i] = 1
assert len(preds[0]) == len(mask)
masks = torch.tensor([mask])
convertor = NerConvertor(
annotation_type='bio',
vocab_file=vocab_file,
categories=categories,
max_len=128)
all_entities = convertor.convert_pred2entities(preds=preds, masks=masks)
assert len(all_entities[0][0]) == 3
tmp_dir.cleanup()

View File

@ -1,75 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import math
import os.path as osp
import tempfile
from mmocr.datasets.ocr_dataset import OCRDataset
def _create_dummy_ann_file(ann_file):
ann_info1 = 'sample1.jpg hello'
ann_info2 = 'sample2.jpg world'
with open(ann_file, 'w') as fw:
for ann_info in [ann_info1, ann_info2]:
fw.write(ann_info + '\n')
def _create_dummy_loader():
loader = dict(
type='HardDiskLoader',
repeat=1,
parser=dict(type='LineStrParser', keys=['file_name', 'text']))
return loader
def test_detect_dataset():
tmp_dir = tempfile.TemporaryDirectory()
# create dummy data
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
_create_dummy_ann_file(ann_file)
# test initialization
loader = _create_dummy_loader()
dataset = OCRDataset(ann_file, loader, pipeline=[])
tmp_dir.cleanup()
# test pre_pipeline
img_info = dataset.data_infos[0]
results = dict(img_info=img_info)
dataset.pre_pipeline(results)
assert results['img_prefix'] == dataset.img_prefix
assert results['text'] == img_info['text']
# test evluation
metric = 'acc'
results = [{'text': 'hello'}, {'text': 'worl'}]
eval_res = dataset.evaluate(results, metric)
assert math.isclose(eval_res['word_acc'], 0.5, abs_tol=1e-4)
assert math.isclose(eval_res['char_precision'], 1.0, abs_tol=1e-4)
assert math.isclose(eval_res['char_recall'], 0.9, abs_tol=1e-4)
eval_res = dataset.evaluate(results, metric='word_acc')
assert math.isclose(eval_res['word_acc'], 0.5, abs_tol=1e-4)
assert len(eval_res) == 1
eval_res = dataset.evaluate(
results, metric=['char_precision', 'char_recall'])
assert math.isclose(eval_res['char_precision'], 1.0, abs_tol=1e-4)
assert math.isclose(eval_res['char_recall'], 0.9, abs_tol=1e-4)
assert len(eval_res) == 2
results = [{'text': 'HELLO*'}, {'text': 'worl'}]
eval_res = dataset.evaluate(
results,
metric=[
'word_acc_ignore_case_symbol', 'word_acc_ignore_case',
'one_minus_ned'
])
assert math.isclose(
eval_res['word_acc_ignore_case_symbol'], 0.5, abs_tol=1e-4)
assert math.isclose(eval_res['word_acc_ignore_case'], 0, abs_tol=1e-4)
assert math.isclose(eval_res['1-N.E.D'], 0.9, abs_tol=1e-4)
assert len(eval_res) == 3

View File

@ -1,98 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import math
import os.path as osp
import tempfile
import torch
from mmocr.datasets.openset_kie_dataset import OpensetKIEDataset
from mmocr.utils import list_to_file
def _create_dummy_ann_file(ann_file):
ann_info1 = {
'file_name':
'1.png',
'height':
200,
'width':
200,
'annotations': [{
'text': 'store',
'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0],
'label': 1,
'edge': 1
}, {
'text': 'MyFamily',
'box': [23.0, 2.0, 31.0, 1.0, 24.0, 11.0, 16.0, 11.0],
'label': 2,
'edge': 1
}]
}
list_to_file(ann_file, [json.dumps(ann_info1)])
return ann_info1
def _create_dummy_dict_file(dict_file):
dict_str = '0123'
list_to_file(dict_file, list(dict_str))
def _create_dummy_loader():
loader = dict(
type='HardDiskLoader',
repeat=1,
parser=dict(
type='LineJsonParser',
keys=['file_name', 'height', 'width', 'annotations']))
return loader
def test_openset_kie_dataset():
with tempfile.TemporaryDirectory() as tmp_dir_name:
# create dummy data
ann_file = osp.join(tmp_dir_name, 'fake_data.txt')
ann_info1 = _create_dummy_ann_file(ann_file)
dict_file = osp.join(tmp_dir_name, 'fake_dict.txt')
_create_dummy_dict_file(dict_file)
# test initialization
loader = _create_dummy_loader()
dataset = OpensetKIEDataset(ann_file, loader, dict_file, pipeline=[])
dataset.prepare_train_img(0)
# test pre_pipeline
img_ann_info = dataset.data_infos[0]
img_info = {
'filename': img_ann_info['file_name'],
'height': img_ann_info['height'],
'width': img_ann_info['width']
}
ann_info = dataset._parse_anno_info(img_ann_info['annotations'])
results = dict(img_info=img_info, ann_info=ann_info)
dataset.pre_pipeline(results)
assert results['img_prefix'] == dataset.img_prefix
assert 'ori_texts' in results
# test evaluation
result = {
'img_metas': [{
'filename': ann_info1['file_name'],
'ori_filename': ann_info1['file_name'],
'ori_texts': [],
'ori_bboxes': []
}]
}
for anno in ann_info1['annotations']:
result['img_metas'][0]['ori_texts'].append(anno['text'])
result['img_metas'][0]['ori_bboxes'].append(anno['box'])
result['nodes'] = torch.tensor([[0.01, 0.8, 0.01, 0.18],
[0.01, 0.01, 0.9, 0.08]])
result['edges'] = torch.Tensor([[0.01, 0.99] for _ in range(4)])
eval_res = dataset.evaluate([result])
assert math.isclose(eval_res['edge_openset_f1'], 1.0, abs_tol=1e-4)

View File

@ -1,64 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import pytest
from mmocr.datasets.utils.parser import LineJsonParser, LineStrParser
def test_line_str_parser():
data_ret = ['sample1.jpg hello\n', 'sample2.jpg world']
keys = ['filename', 'text']
keys_idx = [0, 1]
separator = ' '
# test init
with pytest.raises(AssertionError):
parser = LineStrParser('filename', keys_idx, separator)
with pytest.raises(AssertionError):
parser = LineStrParser(keys, keys_idx, [' '])
with pytest.raises(AssertionError):
parser = LineStrParser(keys, [0], separator)
# test get_item
parser = LineStrParser(keys, keys_idx, separator)
assert parser.get_item(data_ret, 0) == {
'filename': 'sample1.jpg',
'text': 'hello'
}
with pytest.raises(Exception):
parser = LineStrParser(['filename', 'text', 'ignore'], [0, 1, 2],
separator)
parser.get_item(data_ret, 0)
def test_line_dict_parser():
data_ret = [
json.dumps({
'filename': 'sample1.jpg',
'text': 'hello'
}),
json.dumps({
'filename': 'sample2.jpg',
'text': 'world'
})
]
keys = ['filename', 'text']
# test init
with pytest.raises(AssertionError):
parser = LineJsonParser('filename')
with pytest.raises(AssertionError):
parser = LineJsonParser([])
# test get_item
parser = LineJsonParser(keys)
assert parser.get_item(data_ret, 0) == {
'filename': 'sample1.jpg',
'text': 'hello'
}
with pytest.raises(Exception):
parser = LineJsonParser(['img_name', 'text'])
parser.get_item(data_ret, 0)

View File

@ -1,34 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import pytest
from mmocr.datasets.pipelines.test_time_aug import MultiRotateAugOCR
def test_resize_ocr():
input_img1 = np.ones((64, 256, 3), dtype=np.uint8)
input_img2 = np.ones((64, 32, 3), dtype=np.uint8)
rci = MultiRotateAugOCR(transforms=[], rotate_degrees=[0, 90, 270])
# test invalid arguments
with pytest.raises(AssertionError):
MultiRotateAugOCR(transforms=[], rotate_degrees=[45])
with pytest.raises(AssertionError):
MultiRotateAugOCR(transforms=[], rotate_degrees=[20.5])
# test call with input_img1
results = {'img_shape': input_img1.shape, 'img': input_img1}
results = rci(results)
assert np.allclose([64, 256, 3], results['img_shape'])
assert len(results['img']) == 1
assert len(results['img_shape']) == 1
assert np.allclose([64, 256, 3], results['img_shape'][0])
# test call with input_img2
results = {'img_shape': input_img2.shape, 'img': input_img2}
results = rci(results)
assert np.allclose([64, 32, 3], results['img_shape'])
assert len(results['img']) == 3
assert len(results['img_shape']) == 3
assert np.allclose([64, 32, 3], results['img_shape'][0])

View File

@ -1,263 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
from unittest import mock
import numpy as np
from mmdet.core import PolygonMasks
import mmocr.datasets.pipelines.custom_format_bundle as cf_bundle
import mmocr.datasets.pipelines.textdet_targets as textdet_targets
@mock.patch('%s.cf_bundle.show_feature' % __name__)
def test_gen_pannet_targets(mock_show_feature):
target_generator = textdet_targets.PANetTargets()
assert target_generator.max_shrink == 20
# test generate_kernels
img_size = (3, 10)
text_polys = [[np.array([0, 0, 1, 0, 1, 1, 0, 1])],
[np.array([2, 0, 3, 0, 3, 1, 2, 1])]]
shrink_ratio = 1.0
kernel = np.array([[1, 1, 2, 2, 0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
output, _ = target_generator.generate_kernels(img_size, text_polys,
shrink_ratio)
print(output)
assert np.allclose(output, kernel)
# test generate_effective_mask
polys_ignore = text_polys
output = target_generator.generate_effective_mask((3, 10), polys_ignore)
target = np.array([[0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
assert np.allclose(output, target)
# test generate_targets
results = {}
results['img'] = np.zeros((3, 10, 3), np.uint8)
results['gt_masks'] = PolygonMasks(text_polys, 3, 10)
results['gt_masks_ignore'] = PolygonMasks([], 3, 10)
results['img_shape'] = (3, 10, 3)
results['mask_fields'] = []
output = target_generator(results)
assert len(output['gt_kernels']) == 2
assert len(output['gt_mask']) == 1
bundle = cf_bundle.CustomFormatBundle(
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=True, boundary_key='gt_kernels'))
bundle(output)
assert 'gt_kernels' in output.keys()
assert 'gt_mask' in output.keys()
mock_show_feature.assert_called_once()
def test_gen_psenet_targets():
target_generator = textdet_targets.PSENetTargets()
assert target_generator.max_shrink == 20
assert target_generator.shrink_ratio == (1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4)
# Test DBNetTargets
def test_dbnet_targets_find_invalid():
target_generator = textdet_targets.DBNetTargets()
assert target_generator.shrink_ratio == 0.4
assert target_generator.thr_min == 0.3
assert target_generator.thr_max == 0.7
results = {}
text_polys = [[np.array([0, 0, 10, 0, 10, 10, 0, 10])],
[np.array([20, 0, 30, 0, 30, 10, 20, 10])]]
results['gt_masks'] = PolygonMasks(text_polys, 40, 40)
ignore_tags = target_generator.find_invalid(results)
assert np.allclose(ignore_tags, [False, False])
def test_dbnet_targets():
target_generator = textdet_targets.DBNetTargets()
assert target_generator.shrink_ratio == 0.4
assert target_generator.thr_min == 0.3
assert target_generator.thr_max == 0.7
def test_dbnet_ignore_texts():
target_generator = textdet_targets.DBNetTargets()
ignore_tags = [True, False]
results = {}
text_polys = [[np.array([0, 0, 10, 0, 10, 10, 0, 10])],
[np.array([20, 0, 30, 0, 30, 10, 20, 10])]]
text_polys_ignore = [[np.array([0, 0, 15, 0, 15, 10, 0, 10])]]
results['gt_masks_ignore'] = PolygonMasks(text_polys_ignore, 40, 40)
results['gt_masks'] = PolygonMasks(text_polys, 40, 40)
results['gt_bboxes'] = np.array([[0, 0, 10, 10], [20, 0, 30, 10]])
results['gt_labels'] = np.array([0, 1])
target_generator.ignore_texts(results, ignore_tags)
assert np.allclose(results['gt_labels'], np.array([1]))
assert len(results['gt_masks_ignore'].masks) == 2
assert np.allclose(results['gt_masks_ignore'].masks[1][0],
text_polys[0][0])
assert len(results['gt_masks'].masks) == 1
def test_dbnet_generate_thr_map():
target_generator = textdet_targets.DBNetTargets()
text_polys = [[np.array([0, 0, 10, 0, 10, 10, 0, 10])],
[np.array([20, 0, 30, 0, 30, 10, 20, 10])]]
thr_map, thr_mask = target_generator.generate_thr_map((40, 40), text_polys)
assert np.all((thr_map >= 0.29) * (thr_map <= 0.71))
def test_dbnet_draw_border_map():
target_generator = textdet_targets.DBNetTargets()
poly = np.array([[20, 21], [-14, 20], [-11, 30], [-22, 26]])
img_size = (40, 40)
thr_map = np.zeros(img_size, dtype=np.float32)
thr_mask = np.zeros(img_size, dtype=np.uint8)
target_generator.draw_border_map(poly, thr_map, thr_mask)
def test_dbnet_generate_targets():
target_generator = textdet_targets.DBNetTargets()
text_polys = [[np.array([0, 0, 10, 0, 10, 10, 0, 10])],
[np.array([20, 0, 30, 0, 30, 10, 20, 10])]]
text_polys_ignore = [[np.array([0, 0, 15, 0, 15, 10, 0, 10])]]
results = {}
results['mask_fields'] = []
results['img_shape'] = (40, 40, 3)
results['gt_masks_ignore'] = PolygonMasks(text_polys_ignore, 40, 40)
results['gt_masks'] = PolygonMasks(text_polys, 40, 40)
results['gt_bboxes'] = np.array([[0, 0, 10, 10], [20, 0, 30, 10]])
results['gt_labels'] = np.array([0, 1])
target_generator.generate_targets(results)
assert 'gt_shrink' in results['mask_fields']
assert 'gt_shrink_mask' in results['mask_fields']
assert 'gt_thr' in results['mask_fields']
assert 'gt_thr_mask' in results['mask_fields']
def test_fcenet_generate_targets():
fourier_degree = 5
target_generator = textdet_targets.FCENetTargets(
fourier_degree=fourier_degree)
h, w, c = (64, 64, 3)
text_polys = [[np.array([0, 0, 10, 0, 10, 10, 0, 10])],
[np.array([20, 0, 30, 0, 30, 10, 20, 10])]]
text_polys_ignore = [[np.array([0, 0, 15, 0, 15, 10, 0, 10])]]
results = {}
results['mask_fields'] = []
results['img_shape'] = (h, w, c)
results['gt_masks_ignore'] = PolygonMasks(text_polys_ignore, h, w)
results['gt_masks'] = PolygonMasks(text_polys, h, w)
results['gt_bboxes'] = np.array([[0, 0, 10, 10], [20, 0, 30, 10]])
results['gt_labels'] = np.array([0, 1])
target_generator.generate_targets(results)
assert 'p3_maps' in results.keys()
assert 'p4_maps' in results.keys()
assert 'p5_maps' in results.keys()
def test_gen_drrg_targets():
target_generator = textdet_targets.DRRGTargets()
assert np.allclose(target_generator.orientation_thr, 2.0)
assert np.allclose(target_generator.resample_step, 8.0)
assert target_generator.num_min_comps == 9
assert target_generator.num_max_comps == 600
assert np.allclose(target_generator.min_width, 8.0)
assert np.allclose(target_generator.max_width, 24.0)
assert np.allclose(target_generator.center_region_shrink_ratio, 0.3)
assert np.allclose(target_generator.comp_shrink_ratio, 1.0)
assert np.allclose(target_generator.comp_w_h_ratio, 0.3)
assert np.allclose(target_generator.text_comp_nms_thr, 0.25)
assert np.allclose(target_generator.min_rand_half_height, 8.0)
assert np.allclose(target_generator.max_rand_half_height, 24.0)
assert np.allclose(target_generator.jitter_level, 0.2)
# test generate_targets
target_generator = textdet_targets.DRRGTargets(
min_width=2.,
max_width=4.,
min_rand_half_height=3.,
max_rand_half_height=5.)
results = {}
results['img'] = np.zeros((64, 64, 3), np.uint8)
text_polys = [[np.array([4, 2, 30, 2, 30, 10, 4, 10])],
[np.array([36, 12, 8, 12, 8, 22, 36, 22])],
[np.array([48, 20, 52, 20, 52, 50, 48, 50])],
[np.array([44, 50, 38, 50, 38, 20, 44, 20])]]
results['gt_masks'] = PolygonMasks(text_polys, 20, 30)
results['gt_masks_ignore'] = PolygonMasks([], 64, 64)
results['img_shape'] = (64, 64, 3)
results['mask_fields'] = []
output = target_generator(results)
assert len(output['gt_text_mask']) == 1
assert len(output['gt_center_region_mask']) == 1
assert len(output['gt_mask']) == 1
assert len(output['gt_top_height_map']) == 1
assert len(output['gt_bot_height_map']) == 1
assert len(output['gt_sin_map']) == 1
assert len(output['gt_cos_map']) == 1
assert output['gt_comp_attribs'].shape[-1] == 8
# test generate_targets with the number of proposed text components exceeds
# num_max_comps
target_generator = textdet_targets.DRRGTargets(
min_width=2.,
max_width=4.,
min_rand_half_height=3.,
max_rand_half_height=5.,
num_max_comps=6)
output = target_generator(results)
assert output['gt_comp_attribs'].ndim == 2
assert output['gt_comp_attribs'].shape[0] == 6
# test generate_targets with blank polygon masks
target_generator = textdet_targets.DRRGTargets(
min_width=2.,
max_width=4.,
min_rand_half_height=3.,
max_rand_half_height=5.)
results = {}
results['img'] = np.zeros((20, 30, 3), np.uint8)
results['gt_masks'] = PolygonMasks([], 20, 30)
results['gt_masks_ignore'] = PolygonMasks([], 20, 30)
results['img_shape'] = (20, 30, 3)
results['mask_fields'] = []
output = target_generator(results)
assert output['gt_comp_attribs'][0, 0] > 8
# test generate_targets with one proposed text component
text_polys = [[np.array([13, 6, 17, 6, 17, 14, 13, 14])]]
target_generator = textdet_targets.DRRGTargets(
min_width=4.,
max_width=8.,
min_rand_half_height=3.,
max_rand_half_height=5.)
results['gt_masks'] = PolygonMasks(text_polys, 20, 30)
output = target_generator(results)
assert output['gt_comp_attribs'][0, 0] > 8
# test generate_targets with shrunk margin in generate_rand_comp_attribs
target_generator = textdet_targets.DRRGTargets(
min_width=2.,
max_width=30.,
min_rand_half_height=3.,
max_rand_half_height=30.)
output = target_generator(results)
assert output['gt_comp_attribs'][0, 0] > 8

View File

@ -1,66 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import unittest.mock as mock
import numpy as np
import pytest
from mmocr.datasets.pipelines import (OneOfWrapper, RandomWrapper,
TorchVisionWrapper)
from mmocr.datasets.pipelines.transforms import ColorJitter
def test_torchvision_wrapper():
x = {'img': np.ones((128, 100, 3), dtype=np.uint8)}
# object not found error
with pytest.raises(Exception):
TorchVisionWrapper(op='NonExist')
with pytest.raises(TypeError):
TorchVisionWrapper()
f = TorchVisionWrapper('Grayscale')
with pytest.raises(AssertionError):
f({})
results = f(x)
assert results['img'].shape == (128, 100)
assert results['img_shape'] == (128, 100)
@mock.patch('random.choice')
def test_oneof(rand_choice):
color_jitter = dict(type='TorchVisionWrapper', op='ColorJitter')
gray_scale = dict(type='TorchVisionWrapper', op='Grayscale')
x = {'img': np.random.randint(0, 256, size=(128, 100, 3), dtype=np.uint8)}
f = OneOfWrapper([color_jitter, gray_scale])
# Use color_jitter at the first call
rand_choice.side_effect = lambda x: x[0]
results = f(x)
assert results['img'].shape == (128, 100, 3)
# Use gray_scale at the second call
rand_choice.side_effect = lambda x: x[1]
results = f(x)
assert results['img'].shape == (128, 100)
# Passing object
f = OneOfWrapper([ColorJitter(), gray_scale])
# Use color_jitter at the first call
results = f(x)
assert results['img'].shape == (128, 100)
# Test invalid inputs
with pytest.raises(AssertionError):
f = OneOfWrapper(None)
with pytest.raises(AssertionError):
f = OneOfWrapper([])
with pytest.raises(AssertionError):
f = OneOfWrapper({})
@mock.patch('numpy.random.uniform')
def test_runwithprob(np_random_uniform):
np_random_uniform.side_effect = [0.1, 0.9]
f = RandomWrapper([dict(type='TorchVisionWrapper', op='Grayscale')], 0.5)
img = np.random.randint(0, 256, size=(128, 100, 3), dtype=np.uint8)
results = f({'img': copy.deepcopy(img)})
assert results['img'].shape == (128, 100)
results = f({'img': copy.deepcopy(img)})
assert results['img'].shape == (128, 100, 3)

View File

@ -1,42 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import unittest.mock as mock
import numpy as np
import mmocr.datasets.pipelines.transforms as transforms
@mock.patch('%s.transforms.np.random.random_sample' % __name__)
def test_scale_aspect_jitter(mock_random):
img_scale = [(3000, 1000)] # unused
ratio_range = (0.5, 1.5)
aspect_ratio_range = (1, 1)
multiscale_mode = 'value'
long_size_bound = 2000
short_size_bound = 640
resize_type = 'long_short_bound'
keep_ratio = False
jitter = transforms.ScaleAspectJitter(
img_scale=img_scale,
ratio_range=ratio_range,
aspect_ratio_range=aspect_ratio_range,
multiscale_mode=multiscale_mode,
long_size_bound=long_size_bound,
short_size_bound=short_size_bound,
resize_type=resize_type,
keep_ratio=keep_ratio)
mock_random.side_effect = [0.5]
# test sample_from_range
result = jitter.sample_from_range([100, 200])
assert result == 150
# test _random_scale
results = {}
results['img'] = np.zeros((4000, 1000))
mock_random.side_effect = [0.5, 1]
jitter._random_scale(results)
# scale1 0.5 scale2=1 scale =0.5 650/1000, w, h
# print(results['scale'])
assert results['scale'] == (650, 2600)

View File

@ -1,130 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import pytest
from mmdet.datasets import DATASETS
from mmocr.datasets import UniformConcatDataset
from mmocr.utils import list_from_file
def test_uniform_concat_dataset_pipeline():
pipeline1 = [dict(type='LoadImageFromFile')]
pipeline2 = [dict(type='LoadImageFromFile'), dict(type='ColorJitter')]
img_prefix = 'tests/data/ocr_toy_dataset/imgs'
ann_file = 'tests/data/ocr_toy_dataset/label.txt'
train1 = dict(
type='OCRDataset',
img_prefix=img_prefix,
ann_file=ann_file,
loader=dict(
type='HardDiskLoader',
repeat=1,
parser=dict(
type='LineStrParser',
keys=['filename', 'text'],
keys_idx=[0, 1],
separator=' ')),
pipeline=None,
test_mode=False)
train2 = {key: value for key, value in train1.items()}
train2['pipeline'] = pipeline2
# pipeline is 1d list
copy_train1 = copy.deepcopy(train1)
copy_train2 = copy.deepcopy(train2)
tmp_dataset = UniformConcatDataset(
datasets=[copy_train1, copy_train2],
pipeline=pipeline1,
force_apply=True)
assert len(tmp_dataset) == 2 * len(list_from_file(ann_file))
assert len(tmp_dataset.datasets[0].pipeline.transforms) == len(
tmp_dataset.datasets[1].pipeline.transforms)
# pipeline is None
copy_train2 = copy.deepcopy(train2)
tmp_dataset = UniformConcatDataset(datasets=[copy_train2], pipeline=None)
assert len(tmp_dataset.datasets[0].pipeline.transforms) == len(pipeline2)
copy_train2 = copy.deepcopy(train2)
tmp_dataset = UniformConcatDataset(
datasets=[[copy_train2], [copy_train2]], pipeline=None)
assert len(tmp_dataset.datasets[0].pipeline.transforms) == len(pipeline2)
# pipeline is 2d list
copy_train1 = copy.deepcopy(train1)
copy_train2 = copy.deepcopy(train2)
tmp_dataset = UniformConcatDataset(
datasets=[[copy_train1], [copy_train2]],
pipeline=[pipeline1, pipeline2])
assert len(tmp_dataset.datasets[0].pipeline.transforms) == len(pipeline1)
def test_uniform_concat_dataset_eval():
@DATASETS.register_module()
class DummyDataset:
def __init__(self):
self.CLASSES = 0
self.ann_file = 'empty'
def __len__(self):
return 1
def evaluate(self, res, logger, **kwargs):
return dict(n=res[0])
# Test 'auto'
fake_inputs = [10]
datasets = [dict(type='DummyDataset')]
tmp_dataset = UniformConcatDataset(datasets)
results = tmp_dataset.evaluate(fake_inputs)
assert results['0_n'] == 10
assert 'mean_n' not in results
tmp_dataset = UniformConcatDataset(datasets, show_mean_scores=True)
results = tmp_dataset.evaluate(fake_inputs)
assert results['mean_n'] == 10
fake_inputs = [10, 20]
datasets = [dict(type='DummyDataset'), dict(type='DummyDataset')]
tmp_dataset = UniformConcatDataset(datasets)
tmp_dataset = UniformConcatDataset(datasets)
results = tmp_dataset.evaluate(fake_inputs)
assert results['0_n'] == 10
assert results['1_n'] == 20
assert results['mean_n'] == 15
tmp_dataset = UniformConcatDataset(datasets, show_mean_scores=False)
results = tmp_dataset.evaluate(fake_inputs)
assert results['0_n'] == 10
assert results['1_n'] == 20
assert 'mean_n' not in results
with pytest.raises(NotImplementedError):
ds = UniformConcatDataset(datasets, separate_eval=False)
ds.evaluate(fake_inputs)
with pytest.raises(NotImplementedError):
@DATASETS.register_module()
class DummyDataset2:
def __init__(self):
self.CLASSES = 0
self.ann_file = 'empty'
def __len__(self):
return 1
def evaluate(self, res, logger, **kwargs):
return dict(n=res[0])
UniformConcatDataset(
[dict(type='DummyDataset'),
dict(type='DummyDataset2')],
show_mean_scores=True)

View File

@ -1,72 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import tempfile
import numpy as np
import pytest
from mmocr.core.evaluation.hmean import (eval_hmean, get_gt_masks,
output_ranklist)
def _create_dummy_ann_infos():
ann_infos = {
'bboxes': np.array([[50., 70., 80., 100.]], dtype=np.float32),
'labels': np.array([1], dtype=np.int64),
'bboxes_ignore': np.array([[120, 140, 200, 200]], dtype=np.float32),
'masks': [[[50, 70, 80, 70, 80, 100, 50, 100]]],
'masks_ignore': [[[120, 140, 200, 140, 200, 200, 120, 200]]]
}
return [ann_infos]
def test_output_ranklist():
result = [{'hmean': 1}, {'hmean': 0.5}]
file_name = tempfile.NamedTemporaryFile().name
img_infos = [{'file_name': 'sample1.jpg'}, {'file_name': 'sample2.jpg'}]
json_file = file_name + '.json'
with pytest.raises(AssertionError):
output_ranklist([[]], img_infos, json_file)
with pytest.raises(AssertionError):
output_ranklist(result, [[]], json_file)
with pytest.raises(AssertionError):
output_ranklist(result, img_infos, file_name)
sorted_outputs = output_ranklist(result, img_infos, json_file)
assert sorted_outputs[0]['hmean'] == 0.5
def test_get_gt_mask():
ann_infos = _create_dummy_ann_infos()
gt_masks, gt_masks_ignore = get_gt_masks(ann_infos)
assert np.allclose(gt_masks[0], [[50, 70, 80, 70, 80, 100, 50, 100]])
assert np.allclose(gt_masks_ignore[0],
[[120, 140, 200, 140, 200, 200, 120, 200]])
def test_eval_hmean():
metrics = {'hmean-iou', 'hmean-ic13'}
results = [{
'boundary_result': [[50, 70, 80, 70, 80, 100, 50, 100, 1],
[120, 140, 200, 140, 200, 200, 120, 200, 1]]
}]
img_infos = [{'file_name': 'sample1.jpg'}]
ann_infos = _create_dummy_ann_infos()
# test invalid arguments
with pytest.raises(AssertionError):
eval_hmean(results, [[]], ann_infos, metrics=metrics)
with pytest.raises(AssertionError):
eval_hmean(results, img_infos, [[]], metrics=metrics)
with pytest.raises(AssertionError):
eval_hmean([[]], img_infos, ann_infos, metrics=metrics)
with pytest.raises(AssertionError):
eval_hmean(results, img_infos, ann_infos, metrics='hmean-iou')
eval_results = eval_hmean(results, img_infos, ann_infos, metrics=metrics)
assert eval_results['hmean-iou:hmean'] == 1
assert eval_results['hmean-ic13:hmean'] == 1

View File

@ -1,41 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
"""Test hmean_iou."""
import pytest
import mmocr.core.evaluation.hmean_iou as hmean_iou
def test_eval_hmean_iou():
pred_boxes = []
gt_boxes = []
gt_ignored_boxes = []
iou_thr = 0.5
precision_thr = 0.5
# test invalid arguments.
with pytest.raises(AssertionError):
hmean_iou.eval_hmean_iou([1], gt_boxes, gt_ignored_boxes, iou_thr,
precision_thr)
with pytest.raises(AssertionError):
hmean_iou.eval_hmean_iou(pred_boxes, [1], gt_ignored_boxes, iou_thr,
precision_thr)
with pytest.raises(AssertionError):
hmean_iou.eval_hmean_iou(pred_boxes, gt_boxes, [1], iou_thr,
precision_thr)
with pytest.raises(AssertionError):
hmean_iou.eval_hmean_iou(pred_boxes, gt_boxes, gt_ignored_boxes, 1.1,
precision_thr)
with pytest.raises(AssertionError):
hmean_iou.eval_hmean_iou(pred_boxes, gt_boxes, gt_ignored_boxes,
iou_thr, 1.1)
pred_boxes = [[[0, 0, 1, 0, 1, 1, 0, 1], [2, 0, 3, 0, 3, 1, 2, 1]]]
gt_boxes = [[[0, 0, 1, 0, 1, 1, 0, 1], [2, 0, 3, 0, 3, 1, 2, 1]]]
gt_ignored_boxes = [[]]
results = hmean_iou.eval_hmean_iou(pred_boxes, gt_boxes, gt_ignored_boxes,
iou_thr, precision_thr)
assert results[1][0]['recall'] == 1
assert results[1][0]['precision'] == 1
assert results[1][0]['hmean'] == 1