mirror of
https://github.com/open-mmlab/mmocr.git
synced 2025-06-03 21:54:47 +08:00
[Remove] remove unuse dataset and pipeline
This commit is contained in:
parent
de78a8839f
commit
83aac48491
@ -1,5 +1,4 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .builder import LOADERS, PARSERS
|
||||
from .icdar_dataset import IcdarDataset
|
||||
from .ocr_dataset import OCRDataset
|
||||
from .ocr_seg_dataset import OCRSegDataset
|
||||
@ -9,6 +8,6 @@ from .recog_text_dataset import RecogTextDataset
|
||||
from .wildreceipt_dataset import WildReceiptDataset
|
||||
|
||||
__all__ = [
|
||||
'IcdarDataset', 'OCRDataset', 'OCRSegDataset', 'PARSERS', 'LOADERS',
|
||||
'RecogLMDBDataset', 'RecogTextDataset', 'WildReceiptDataset'
|
||||
'IcdarDataset', 'OCRDataset', 'OCRSegDataset', 'RecogLMDBDataset',
|
||||
'RecogTextDataset', 'WildReceiptDataset'
|
||||
]
|
||||
|
@ -1,6 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
from mmocr.registry import TRANSFORMS
|
||||
|
||||
LOADERS = TRANSFORMS
|
||||
PARSERS = TRANSFORMS
|
@ -1,7 +1,6 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .formatting import PackKIEInputs, PackTextDetInputs, PackTextRecogInputs
|
||||
from .loading import LoadKIEAnnotations, LoadOCRAnnotations
|
||||
from .ocr_seg_targets import OCRSegTargets
|
||||
from .ocr_transforms import (FancyPCA, NormalizeOCR, OnlineCropOCR,
|
||||
OpencvToPil, PilToOpencv, RandomPaddingOCR,
|
||||
RandomRotateImageBox, ResizeOCR, ToTensorOCR)
|
||||
@ -11,20 +10,16 @@ from .processing import (BoundedScaleAspectJitter, FixInvalidPolygon,
|
||||
ShortScaleAspectJitter, SourceImagePad,
|
||||
TextDetRandomCrop, TextDetRandomCropFlip)
|
||||
from .test_time_aug import MultiRotateAugOCR
|
||||
from .textdet_targets import (DBNetTargets, FCENetTargets, PANetTargets,
|
||||
TextSnakeTargets)
|
||||
from .transforms import ScaleAspectJitter
|
||||
from .wrappers import ImgAug, TorchVisionWrapper
|
||||
|
||||
__all__ = [
|
||||
'LoadOCRAnnotations', 'NormalizeOCR', 'OnlineCropOCR', 'ResizeOCR',
|
||||
'ToTensorOCR', 'DBNetTargets', 'PANetTargets', 'RandomRotate',
|
||||
'ScaleAspectJitter', 'MultiRotateAugOCR', 'OCRSegTargets', 'FancyPCA',
|
||||
'ToTensorOCR', 'RandomRotate', 'MultiRotateAugOCR', 'FancyPCA',
|
||||
'RandomPaddingOCR', 'ImgAug', 'RandomRotateImageBox', 'OpencvToPil',
|
||||
'PilToOpencv', 'SourceImagePad', 'TextSnakeTargets', 'FCENetTargets',
|
||||
'TextDetRandomCropFlip', 'PyramidRescale', 'TorchVisionWrapper', 'Resize',
|
||||
'RandomCrop', 'TextDetRandomCrop', 'RandomCrop', 'PackTextDetInputs',
|
||||
'PackTextRecogInputs', 'RescaleToHeight', 'PadToWidth',
|
||||
'ShortScaleAspectJitter', 'RandomFlip', 'BoundedScaleAspectJitter',
|
||||
'PackKIEInputs', 'LoadKIEAnnotations', 'FixInvalidPolygon'
|
||||
'PilToOpencv', 'SourceImagePad', 'TextDetRandomCropFlip', 'PyramidRescale',
|
||||
'TorchVisionWrapper', 'Resize', 'RandomCrop', 'TextDetRandomCrop',
|
||||
'RandomCrop', 'PackTextDetInputs', 'PackTextRecogInputs',
|
||||
'RescaleToHeight', 'PadToWidth', 'ShortScaleAspectJitter', 'RandomFlip',
|
||||
'BoundedScaleAspectJitter', 'PackKIEInputs', 'LoadKIEAnnotations',
|
||||
'FixInvalidPolygon'
|
||||
]
|
||||
|
@ -1,201 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import cv2
|
||||
import numpy as np
|
||||
from mmdet.core import BitmapMasks
|
||||
|
||||
import mmocr.utils.check_argument as check_argument
|
||||
from mmocr.registry import MODELS, TRANSFORMS
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class OCRSegTargets:
|
||||
"""Generate gt shrunk kernels for segmentation based OCR framework.
|
||||
|
||||
Args:
|
||||
label_convertor (dict): Dictionary to construct label_convertor
|
||||
to convert char to index.
|
||||
attn_shrink_ratio (float): The area shrunk ratio
|
||||
between attention kernels and gt text masks.
|
||||
seg_shrink_ratio (float): The area shrunk ratio
|
||||
between segmentation kernels and gt text masks.
|
||||
box_type (str): Character box type, should be either
|
||||
'char_rects' or 'char_quads', with 'char_rects'
|
||||
for rectangle with ``xyxy`` style and 'char_quads'
|
||||
for quadrangle with ``x1y1x2y2x3y3x4y4`` style.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
label_convertor=None,
|
||||
attn_shrink_ratio=0.5,
|
||||
seg_shrink_ratio=0.25,
|
||||
box_type='char_rects',
|
||||
pad_val=255):
|
||||
|
||||
assert isinstance(attn_shrink_ratio, float)
|
||||
assert isinstance(seg_shrink_ratio, float)
|
||||
assert 0. < attn_shrink_ratio < 1.0
|
||||
assert 0. < seg_shrink_ratio < 1.0
|
||||
assert label_convertor is not None
|
||||
assert box_type in ('char_rects', 'char_quads')
|
||||
|
||||
self.attn_shrink_ratio = attn_shrink_ratio
|
||||
self.seg_shrink_ratio = seg_shrink_ratio
|
||||
self.label_convertor = MODELS.build(label_convertor)
|
||||
self.box_type = box_type
|
||||
self.pad_val = pad_val
|
||||
|
||||
def shrink_char_quad(self, char_quad, shrink_ratio):
|
||||
"""Shrink char box in style of quadrangle.
|
||||
|
||||
Args:
|
||||
char_quad (list[float]): Char box with format
|
||||
[x1, y1, x2, y2, x3, y3, x4, y4].
|
||||
shrink_ratio (float): The area shrunk ratio
|
||||
between gt kernels and gt text masks.
|
||||
"""
|
||||
points = [[char_quad[0], char_quad[1]], [char_quad[2], char_quad[3]],
|
||||
[char_quad[4], char_quad[5]], [char_quad[6], char_quad[7]]]
|
||||
shrink_points = []
|
||||
for p_idx, point in enumerate(points):
|
||||
p1 = points[(p_idx + 3) % 4]
|
||||
p2 = points[(p_idx + 1) % 4]
|
||||
|
||||
dist1 = self.l2_dist_two_points(p1, point)
|
||||
dist2 = self.l2_dist_two_points(p2, point)
|
||||
min_dist = min(dist1, dist2)
|
||||
|
||||
v1 = [p1[0] - point[0], p1[1] - point[1]]
|
||||
v2 = [p2[0] - point[0], p2[1] - point[1]]
|
||||
|
||||
temp_dist1 = (shrink_ratio * min_dist /
|
||||
dist1) if min_dist != 0 else 0.
|
||||
temp_dist2 = (shrink_ratio * min_dist /
|
||||
dist2) if min_dist != 0 else 0.
|
||||
|
||||
v1 = [temp * temp_dist1 for temp in v1]
|
||||
v2 = [temp * temp_dist2 for temp in v2]
|
||||
|
||||
shrink_point = [
|
||||
round(point[0] + v1[0] + v2[0]),
|
||||
round(point[1] + v1[1] + v2[1])
|
||||
]
|
||||
shrink_points.append(shrink_point)
|
||||
|
||||
poly = np.array(shrink_points)
|
||||
|
||||
return poly
|
||||
|
||||
def shrink_char_rect(self, char_rect, shrink_ratio):
|
||||
"""Shrink char box in style of rectangle.
|
||||
|
||||
Args:
|
||||
char_rect (list[float]): Char box with format
|
||||
[x_min, y_min, x_max, y_max].
|
||||
shrink_ratio (float): The area shrunk ratio
|
||||
between gt kernels and gt text masks.
|
||||
"""
|
||||
x_min, y_min, x_max, y_max = char_rect
|
||||
w = x_max - x_min
|
||||
h = y_max - y_min
|
||||
x_min_s = round((x_min + x_max - w * shrink_ratio) / 2)
|
||||
y_min_s = round((y_min + y_max - h * shrink_ratio) / 2)
|
||||
x_max_s = round((x_min + x_max + w * shrink_ratio) / 2)
|
||||
y_max_s = round((y_min + y_max + h * shrink_ratio) / 2)
|
||||
poly = np.array([[x_min_s, y_min_s], [x_max_s, y_min_s],
|
||||
[x_max_s, y_max_s], [x_min_s, y_max_s]])
|
||||
|
||||
return poly
|
||||
|
||||
def generate_kernels(self,
|
||||
resize_shape,
|
||||
pad_shape,
|
||||
char_boxes,
|
||||
char_inds,
|
||||
shrink_ratio=0.5,
|
||||
binary=True):
|
||||
"""Generate char instance kernels for one shrink ratio.
|
||||
|
||||
Args:
|
||||
resize_shape (tuple(int, int)): Image size (height, width)
|
||||
after resizing.
|
||||
pad_shape (tuple(int, int)): Image size (height, width)
|
||||
after padding.
|
||||
char_boxes (list[list[float]]): The list of char polygons.
|
||||
char_inds (list[int]): List of char indexes.
|
||||
shrink_ratio (float): The shrink ratio of kernel.
|
||||
binary (bool): If True, return binary ndarray
|
||||
containing 0 & 1 only.
|
||||
Returns:
|
||||
char_kernel (ndarray): The text kernel mask of (height, width).
|
||||
"""
|
||||
assert isinstance(resize_shape, tuple)
|
||||
assert isinstance(pad_shape, tuple)
|
||||
assert check_argument.is_2dlist(char_boxes)
|
||||
assert check_argument.is_type_list(char_inds, int)
|
||||
assert isinstance(shrink_ratio, float)
|
||||
assert isinstance(binary, bool)
|
||||
|
||||
char_kernel = np.zeros(pad_shape, dtype=np.int32)
|
||||
char_kernel[:resize_shape[0], resize_shape[1]:] = self.pad_val
|
||||
|
||||
for i, char_box in enumerate(char_boxes):
|
||||
if self.box_type == 'char_rects':
|
||||
poly = self.shrink_char_rect(char_box, shrink_ratio)
|
||||
elif self.box_type == 'char_quads':
|
||||
poly = self.shrink_char_quad(char_box, shrink_ratio)
|
||||
|
||||
fill_value = 1 if binary else char_inds[i]
|
||||
cv2.fillConvexPoly(char_kernel, poly.astype(np.int32),
|
||||
(fill_value))
|
||||
|
||||
return char_kernel
|
||||
|
||||
def l2_dist_two_points(self, p1, p2):
|
||||
return ((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)**0.5
|
||||
|
||||
def __call__(self, results):
|
||||
img_shape = results['img_shape']
|
||||
resize_shape = results['resize_shape']
|
||||
|
||||
h_scale = 1.0 * resize_shape[0] / img_shape[0]
|
||||
w_scale = 1.0 * resize_shape[1] / img_shape[1]
|
||||
|
||||
char_boxes, char_inds = [], []
|
||||
char_num = len(results['ann_info'][self.box_type])
|
||||
for i in range(char_num):
|
||||
char_box = results['ann_info'][self.box_type][i]
|
||||
num_points = 2 if self.box_type == 'char_rects' else 4
|
||||
for j in range(num_points):
|
||||
char_box[j * 2] = round(char_box[j * 2] * w_scale)
|
||||
char_box[j * 2 + 1] = round(char_box[j * 2 + 1] * h_scale)
|
||||
char_boxes.append(char_box)
|
||||
char = results['ann_info']['chars'][i]
|
||||
char_ind = self.label_convertor.str2idx([char])[0][0]
|
||||
char_inds.append(char_ind)
|
||||
|
||||
resize_shape = tuple(results['resize_shape'][:2])
|
||||
pad_shape = tuple(results['pad_shape'][:2])
|
||||
binary_target = self.generate_kernels(
|
||||
resize_shape,
|
||||
pad_shape,
|
||||
char_boxes,
|
||||
char_inds,
|
||||
shrink_ratio=self.attn_shrink_ratio,
|
||||
binary=True)
|
||||
|
||||
seg_target = self.generate_kernels(
|
||||
resize_shape,
|
||||
pad_shape,
|
||||
char_boxes,
|
||||
char_inds,
|
||||
shrink_ratio=self.seg_shrink_ratio,
|
||||
binary=False)
|
||||
|
||||
mask = np.ones(pad_shape, dtype=np.int32)
|
||||
mask[:resize_shape[0], resize_shape[1]:] = 0
|
||||
|
||||
results['gt_kernels'] = BitmapMasks([binary_target, seg_target, mask],
|
||||
pad_shape[0], pad_shape[1])
|
||||
results['mask_fields'] = ['gt_kernels']
|
||||
|
||||
return results
|
@ -1,13 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .base_textdet_targets import BaseTextDetTargets
|
||||
from .dbnet_targets import DBNetTargets
|
||||
from .drrg_targets import DRRGTargets
|
||||
from .fcenet_targets import FCENetTargets
|
||||
from .panet_targets import PANetTargets
|
||||
from .psenet_targets import PSENetTargets
|
||||
from .textsnake_targets import TextSnakeTargets
|
||||
|
||||
__all__ = [
|
||||
'BaseTextDetTargets', 'PANetTargets', 'PSENetTargets', 'DBNetTargets',
|
||||
'FCENetTargets', 'TextSnakeTargets', 'DRRGTargets'
|
||||
]
|
@ -1,168 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import sys
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pyclipper
|
||||
from mmcv.utils import print_log
|
||||
from shapely.geometry import Polygon as plg
|
||||
|
||||
import mmocr.utils.check_argument as check_argument
|
||||
|
||||
|
||||
class BaseTextDetTargets:
|
||||
"""Generate text detector ground truths."""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def point2line(self, xs, ys, point_1, point_2):
|
||||
"""Compute the distance from point to a line. This is adapted from
|
||||
https://github.com/MhLiao/DB.
|
||||
|
||||
Args:
|
||||
xs (ndarray): The x coordinates of size hxw.
|
||||
ys (ndarray): The y coordinates of size hxw.
|
||||
point_1 (ndarray): The first point with shape 1x2.
|
||||
point_2 (ndarray): The second point with shape 1x2.
|
||||
|
||||
Returns:
|
||||
result (ndarray): The distance matrix of size hxw.
|
||||
"""
|
||||
# suppose a triangle with three edge abc with c=point_1 point_2
|
||||
# a^2
|
||||
a_square = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
|
||||
# b^2
|
||||
b_square = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
|
||||
# c^2
|
||||
c_square = np.square(point_1[0] - point_2[0]) + np.square(point_1[1] -
|
||||
point_2[1])
|
||||
# -cosC=(c^2-a^2-b^2)/2(ab)
|
||||
neg_cos_c = (
|
||||
(c_square - a_square - b_square) /
|
||||
(np.finfo(np.float32).eps + 2 * np.sqrt(a_square * b_square)))
|
||||
# sinC^2=1-cosC^2
|
||||
square_sin = 1 - np.square(neg_cos_c)
|
||||
square_sin = np.nan_to_num(square_sin)
|
||||
# distance=a*b*sinC/c=a*h/c=2*area/c
|
||||
result = np.sqrt(a_square * b_square * square_sin /
|
||||
(np.finfo(np.float32).eps + c_square))
|
||||
# set result to minimum edge if C<pi/2
|
||||
result[neg_cos_c < 0] = np.sqrt(np.fmin(a_square,
|
||||
b_square))[neg_cos_c < 0]
|
||||
return result
|
||||
|
||||
def polygon_area(self, polygon):
|
||||
"""Compute the polygon area. Please refer to Green's theorem.
|
||||
https://en.wikipedia.org/wiki/Green%27s_theorem. This is adapted from
|
||||
https://github.com/MhLiao/DB.
|
||||
|
||||
Args:
|
||||
polygon (ndarray): The polygon boundary points.
|
||||
"""
|
||||
|
||||
polygon = polygon.reshape(-1, 2)
|
||||
edge = 0
|
||||
for i in range(polygon.shape[0]):
|
||||
next_index = (i + 1) % polygon.shape[0]
|
||||
edge += (polygon[next_index, 0] - polygon[i, 0]) * (
|
||||
polygon[next_index, 1] + polygon[i, 1])
|
||||
|
||||
return edge / 2.
|
||||
|
||||
def polygon_size(self, polygon):
|
||||
"""Estimate the height and width of the minimum bounding box of the
|
||||
polygon.
|
||||
|
||||
Args:
|
||||
polygon (ndarray): The polygon point sequence.
|
||||
|
||||
Returns:
|
||||
size (tuple): The height and width of the minimum bounding box.
|
||||
"""
|
||||
poly = polygon.reshape(-1, 2)
|
||||
rect = cv2.minAreaRect(poly.astype(np.int32))
|
||||
size = rect[1]
|
||||
return size
|
||||
|
||||
def generate_kernels(self,
|
||||
img_size,
|
||||
text_polys,
|
||||
shrink_ratio,
|
||||
max_shrink=sys.maxsize,
|
||||
ignore_tags=None):
|
||||
"""Generate text instance kernels for one shrink ratio.
|
||||
|
||||
Args:
|
||||
img_size (tuple(int, int)): The image size of (height, width).
|
||||
text_polys (list[list[ndarray]]: The list of text polygons.
|
||||
shrink_ratio (float): The shrink ratio of kernel.
|
||||
|
||||
Returns:
|
||||
text_kernel (ndarray): The text kernel mask of (height, width).
|
||||
"""
|
||||
assert isinstance(img_size, tuple)
|
||||
assert check_argument.is_2dlist(text_polys)
|
||||
assert isinstance(shrink_ratio, float)
|
||||
|
||||
h, w = img_size
|
||||
text_kernel = np.zeros((h, w), dtype=np.float32)
|
||||
|
||||
for text_ind, poly in enumerate(text_polys):
|
||||
instance = poly[0].reshape(-1, 2).astype(np.int32)
|
||||
area = plg(instance).area
|
||||
peri = cv2.arcLength(instance, True)
|
||||
distance = min(
|
||||
int(area * (1 - shrink_ratio * shrink_ratio) / (peri + 0.001) +
|
||||
0.5), max_shrink)
|
||||
pco = pyclipper.PyclipperOffset()
|
||||
pco.AddPath(instance, pyclipper.JT_ROUND,
|
||||
pyclipper.ET_CLOSEDPOLYGON)
|
||||
shrunk = np.array(pco.Execute(-distance))
|
||||
|
||||
# check shrunk == [] or empty ndarray
|
||||
if len(shrunk) == 0 or shrunk.size == 0:
|
||||
if ignore_tags is not None:
|
||||
ignore_tags[text_ind] = True
|
||||
continue
|
||||
try:
|
||||
shrunk = np.array(shrunk[0]).reshape(-1, 2)
|
||||
|
||||
except Exception as e:
|
||||
print_log(f'{shrunk} with error {e}')
|
||||
if ignore_tags is not None:
|
||||
ignore_tags[text_ind] = True
|
||||
continue
|
||||
cv2.fillPoly(text_kernel, [shrunk.astype(np.int32)], text_ind + 1)
|
||||
return text_kernel, ignore_tags
|
||||
|
||||
def generate_effective_mask(self, mask_size: tuple, polygons_ignore):
|
||||
"""Generate effective mask by setting the ineffective regions to 0 and
|
||||
effective regions to 1.
|
||||
|
||||
Args:
|
||||
mask_size (tuple): The mask size.
|
||||
polygons_ignore (list[[ndarray]]: The list of ignored text
|
||||
polygons.
|
||||
|
||||
Returns:
|
||||
mask (ndarray): The effective mask of (height, width).
|
||||
"""
|
||||
|
||||
assert check_argument.is_2dlist(polygons_ignore)
|
||||
|
||||
mask = np.ones(mask_size, dtype=np.uint8)
|
||||
|
||||
for poly in polygons_ignore:
|
||||
instance = poly[0].reshape(-1,
|
||||
2).astype(np.int32).reshape(1, -1, 2)
|
||||
cv2.fillPoly(mask, instance, 0)
|
||||
|
||||
return mask
|
||||
|
||||
def generate_targets(self, results):
|
||||
raise NotImplementedError
|
||||
|
||||
def __call__(self, results):
|
||||
results = self.generate_targets(results)
|
||||
return results
|
@ -1,250 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pyclipper
|
||||
from mmdet.core import BitmapMasks
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
from mmocr.registry import TRANSFORMS
|
||||
from . import BaseTextDetTargets
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class DBNetTargets(BaseTextDetTargets):
|
||||
"""Generate gt shrunk text, gt threshold map, and their effective region
|
||||
masks to learn DBNet: Real-time Scene Text Detection with Differentiable
|
||||
Binarization [https://arxiv.org/abs/1911.08947]. This was partially adapted
|
||||
from https://github.com/MhLiao/DB.
|
||||
|
||||
Args:
|
||||
shrink_ratio (float): The area shrunk ratio between text
|
||||
kernels and their text masks.
|
||||
thr_min (float): The minimum value of the threshold map.
|
||||
thr_max (float): The maximum value of the threshold map.
|
||||
min_short_size (int): The minimum size of polygon below which
|
||||
the polygon is invalid.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
shrink_ratio=0.4,
|
||||
thr_min=0.3,
|
||||
thr_max=0.7,
|
||||
min_short_size=8):
|
||||
super().__init__()
|
||||
self.shrink_ratio = shrink_ratio
|
||||
self.thr_min = thr_min
|
||||
self.thr_max = thr_max
|
||||
self.min_short_size = min_short_size
|
||||
|
||||
def find_invalid(self, results):
|
||||
"""Find invalid polygons.
|
||||
|
||||
Args:
|
||||
results (dict): The dict containing gt_mask.
|
||||
|
||||
Returns:
|
||||
ignore_tags (list[bool]): The indicators for ignoring polygons.
|
||||
"""
|
||||
texts = results['gt_masks'].masks
|
||||
ignore_tags = [False] * len(texts)
|
||||
|
||||
for idx, text in enumerate(texts):
|
||||
if self.invalid_polygon(text[0]):
|
||||
ignore_tags[idx] = True
|
||||
return ignore_tags
|
||||
|
||||
def invalid_polygon(self, poly):
|
||||
"""Judge the input polygon is invalid or not. It is invalid if its area
|
||||
smaller than 1 or the shorter side of its minimum bounding box smaller
|
||||
than min_short_size.
|
||||
|
||||
Args:
|
||||
poly (ndarray): The polygon boundary point sequence.
|
||||
|
||||
Returns:
|
||||
True/False (bool): Whether the polygon is invalid.
|
||||
"""
|
||||
area = self.polygon_area(poly)
|
||||
if abs(area) < 1:
|
||||
return True
|
||||
short_size = min(self.polygon_size(poly))
|
||||
if short_size < self.min_short_size:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def ignore_texts(self, results, ignore_tags):
|
||||
"""Ignore gt masks and gt_labels while padding gt_masks_ignore in
|
||||
results given ignore_tags.
|
||||
|
||||
Args:
|
||||
results (dict): Result for one image.
|
||||
ignore_tags (list[int]): Indicate whether to ignore its
|
||||
corresponding ground truth text.
|
||||
|
||||
Returns:
|
||||
results (dict): Results after filtering.
|
||||
"""
|
||||
flag_len = len(ignore_tags)
|
||||
assert flag_len == len(results['gt_masks'].masks)
|
||||
assert flag_len == len(results['gt_labels'])
|
||||
|
||||
results['gt_masks_ignore'].masks += [
|
||||
mask for i, mask in enumerate(results['gt_masks'].masks)
|
||||
if ignore_tags[i]
|
||||
]
|
||||
results['gt_masks'].masks = [
|
||||
mask for i, mask in enumerate(results['gt_masks'].masks)
|
||||
if not ignore_tags[i]
|
||||
]
|
||||
results['gt_labels'] = np.array([
|
||||
mask for i, mask in enumerate(results['gt_labels'])
|
||||
if not ignore_tags[i]
|
||||
])
|
||||
new_ignore_tags = [ignore for ignore in ignore_tags if not ignore]
|
||||
|
||||
return results, new_ignore_tags
|
||||
|
||||
def generate_thr_map(self, img_size, polygons):
|
||||
"""Generate threshold map.
|
||||
|
||||
Args:
|
||||
img_size (tuple(int)): The image size (h,w)
|
||||
polygons (list(ndarray)): The polygon list.
|
||||
|
||||
Returns:
|
||||
thr_map (ndarray): The generated threshold map.
|
||||
thr_mask (ndarray): The effective mask of threshold map.
|
||||
"""
|
||||
thr_map = np.zeros(img_size, dtype=np.float32)
|
||||
thr_mask = np.zeros(img_size, dtype=np.uint8)
|
||||
|
||||
for polygon in polygons:
|
||||
self.draw_border_map(polygon[0], thr_map, mask=thr_mask)
|
||||
thr_map = thr_map * (self.thr_max - self.thr_min) + self.thr_min
|
||||
|
||||
return thr_map, thr_mask
|
||||
|
||||
def draw_border_map(self, polygon, canvas, mask):
|
||||
"""Generate threshold map for one polygon.
|
||||
|
||||
Args:
|
||||
polygon(ndarray): The polygon boundary ndarray.
|
||||
canvas(ndarray): The generated threshold map.
|
||||
mask(ndarray): The generated threshold mask.
|
||||
"""
|
||||
polygon = polygon.reshape(-1, 2)
|
||||
assert polygon.ndim == 2
|
||||
assert polygon.shape[1] == 2
|
||||
|
||||
polygon_shape = Polygon(polygon)
|
||||
distance = (
|
||||
polygon_shape.area * (1 - np.power(self.shrink_ratio, 2)) /
|
||||
polygon_shape.length)
|
||||
subject = [tuple(p) for p in polygon]
|
||||
padding = pyclipper.PyclipperOffset()
|
||||
padding.AddPath(subject, pyclipper.JT_ROUND,
|
||||
pyclipper.ET_CLOSEDPOLYGON)
|
||||
padded_polygon = padding.Execute(distance)
|
||||
if len(padded_polygon) > 0:
|
||||
padded_polygon = np.array(padded_polygon[0])
|
||||
else:
|
||||
print(f'padding {polygon} with {distance} gets {padded_polygon}')
|
||||
padded_polygon = polygon.copy().astype(np.int32)
|
||||
|
||||
x_min = padded_polygon[:, 0].min()
|
||||
x_max = padded_polygon[:, 0].max()
|
||||
y_min = padded_polygon[:, 1].min()
|
||||
y_max = padded_polygon[:, 1].max()
|
||||
|
||||
width = x_max - x_min + 1
|
||||
height = y_max - y_min + 1
|
||||
|
||||
polygon[:, 0] = polygon[:, 0] - x_min
|
||||
polygon[:, 1] = polygon[:, 1] - y_min
|
||||
|
||||
xs = np.broadcast_to(
|
||||
np.linspace(0, width - 1, num=width).reshape(1, width),
|
||||
(height, width))
|
||||
ys = np.broadcast_to(
|
||||
np.linspace(0, height - 1, num=height).reshape(height, 1),
|
||||
(height, width))
|
||||
|
||||
distance_map = np.zeros((polygon.shape[0], height, width),
|
||||
dtype=np.float32)
|
||||
for i in range(polygon.shape[0]):
|
||||
j = (i + 1) % polygon.shape[0]
|
||||
absolute_distance = self.point2line(xs, ys, polygon[i], polygon[j])
|
||||
distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
|
||||
distance_map = distance_map.min(axis=0)
|
||||
|
||||
x_min_valid = min(max(0, x_min), canvas.shape[1] - 1)
|
||||
x_max_valid = min(max(0, x_max), canvas.shape[1] - 1)
|
||||
y_min_valid = min(max(0, y_min), canvas.shape[0] - 1)
|
||||
y_max_valid = min(max(0, y_max), canvas.shape[0] - 1)
|
||||
|
||||
if x_min_valid - x_min >= width or y_min_valid - y_min >= height:
|
||||
return
|
||||
|
||||
cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
|
||||
canvas[y_min_valid:y_max_valid + 1,
|
||||
x_min_valid:x_max_valid + 1] = np.fmax(
|
||||
1 - distance_map[y_min_valid - y_min:y_max_valid - y_max +
|
||||
height, x_min_valid - x_min:x_max_valid -
|
||||
x_max + width],
|
||||
canvas[y_min_valid:y_max_valid + 1,
|
||||
x_min_valid:x_max_valid + 1])
|
||||
|
||||
def generate_targets(self, results):
|
||||
"""Generate the gt targets for DBNet.
|
||||
|
||||
Args:
|
||||
results (dict): The input result dictionary.
|
||||
|
||||
Returns:
|
||||
results (dict): The output result dictionary.
|
||||
"""
|
||||
assert isinstance(results, dict)
|
||||
|
||||
if 'bbox_fields' in results:
|
||||
results['bbox_fields'].clear()
|
||||
|
||||
ignore_tags = self.find_invalid(results)
|
||||
results, ignore_tags = self.ignore_texts(results, ignore_tags)
|
||||
|
||||
h, w, _ = results['img_shape']
|
||||
polygons = results['gt_masks'].masks
|
||||
|
||||
# generate gt_shrink_kernel
|
||||
gt_shrink, ignore_tags = self.generate_kernels((h, w),
|
||||
polygons,
|
||||
self.shrink_ratio,
|
||||
ignore_tags=ignore_tags)
|
||||
|
||||
results, ignore_tags = self.ignore_texts(results, ignore_tags)
|
||||
# genenrate gt_shrink_mask
|
||||
polygons_ignore = results['gt_masks_ignore'].masks
|
||||
gt_shrink_mask = self.generate_effective_mask((h, w), polygons_ignore)
|
||||
|
||||
# generate gt_threshold and gt_threshold_mask
|
||||
polygons = results['gt_masks'].masks
|
||||
gt_thr, gt_thr_mask = self.generate_thr_map((h, w), polygons)
|
||||
|
||||
results['mask_fields'].clear() # rm gt_masks encoded by polygons
|
||||
results.pop('gt_labels', None)
|
||||
results.pop('gt_masks', None)
|
||||
results.pop('gt_bboxes', None)
|
||||
results.pop('gt_bboxes_ignore', None)
|
||||
|
||||
mapping = {
|
||||
'gt_shrink': gt_shrink,
|
||||
'gt_shrink_mask': gt_shrink_mask,
|
||||
'gt_thr': gt_thr,
|
||||
'gt_thr_mask': gt_thr_mask
|
||||
}
|
||||
for key, value in mapping.items():
|
||||
value = value if isinstance(value, list) else [value]
|
||||
results[key] = BitmapMasks(value, h, w)
|
||||
results['mask_fields'].append(key)
|
||||
|
||||
return results
|
@ -1,534 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import cv2
|
||||
import numpy as np
|
||||
from lanms import merge_quadrangle_n9 as la_nms
|
||||
from mmdet.core import BitmapMasks
|
||||
from numpy.linalg import norm
|
||||
|
||||
import mmocr.utils.check_argument as check_argument
|
||||
from mmocr.registry import TRANSFORMS
|
||||
from .textsnake_targets import TextSnakeTargets
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class DRRGTargets(TextSnakeTargets):
|
||||
"""Generate the ground truth targets of DRRG: Deep Relational Reasoning
|
||||
Graph Network for Arbitrary Shape Text Detection.
|
||||
|
||||
[https://arxiv.org/abs/2003.07493]. This code was partially adapted from
|
||||
https://github.com/GXYM/DRRG licensed under the MIT license.
|
||||
|
||||
Args:
|
||||
orientation_thr (float): The threshold for distinguishing between
|
||||
head edge and tail edge among the horizontal and vertical edges
|
||||
of a quadrangle.
|
||||
resample_step (float): The step size for resampling the text center
|
||||
line.
|
||||
num_min_comps (int): The minimum number of text components, which
|
||||
should be larger than k_hop1 mentioned in paper.
|
||||
num_max_comps (int): The maximum number of text components.
|
||||
min_width (float): The minimum width of text components.
|
||||
max_width (float): The maximum width of text components.
|
||||
center_region_shrink_ratio (float): The shrink ratio of text center
|
||||
regions.
|
||||
comp_shrink_ratio (float): The shrink ratio of text components.
|
||||
comp_w_h_ratio (float): The width to height ratio of text components.
|
||||
min_rand_half_height(float): The minimum half-height of random text
|
||||
components.
|
||||
max_rand_half_height (float): The maximum half-height of random
|
||||
text components.
|
||||
jitter_level (float): The jitter level of text component geometric
|
||||
features.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
orientation_thr=2.0,
|
||||
resample_step=8.0,
|
||||
num_min_comps=9,
|
||||
num_max_comps=600,
|
||||
min_width=8.0,
|
||||
max_width=24.0,
|
||||
center_region_shrink_ratio=0.3,
|
||||
comp_shrink_ratio=1.0,
|
||||
comp_w_h_ratio=0.3,
|
||||
text_comp_nms_thr=0.25,
|
||||
min_rand_half_height=8.0,
|
||||
max_rand_half_height=24.0,
|
||||
jitter_level=0.2):
|
||||
|
||||
super().__init__()
|
||||
self.orientation_thr = orientation_thr
|
||||
self.resample_step = resample_step
|
||||
self.num_max_comps = num_max_comps
|
||||
self.num_min_comps = num_min_comps
|
||||
self.min_width = min_width
|
||||
self.max_width = max_width
|
||||
self.center_region_shrink_ratio = center_region_shrink_ratio
|
||||
self.comp_shrink_ratio = comp_shrink_ratio
|
||||
self.comp_w_h_ratio = comp_w_h_ratio
|
||||
self.text_comp_nms_thr = text_comp_nms_thr
|
||||
self.min_rand_half_height = min_rand_half_height
|
||||
self.max_rand_half_height = max_rand_half_height
|
||||
self.jitter_level = jitter_level
|
||||
|
||||
def dist_point2line(self, point, line):
|
||||
|
||||
assert isinstance(line, tuple)
|
||||
point1, point2 = line
|
||||
d = abs(np.cross(point2 - point1, point - point1)) / (
|
||||
norm(point2 - point1) + 1e-8)
|
||||
return d
|
||||
|
||||
def draw_center_region_maps(self, top_line, bot_line, center_line,
|
||||
center_region_mask, top_height_map,
|
||||
bot_height_map, sin_map, cos_map,
|
||||
region_shrink_ratio):
|
||||
"""Draw attributes of text components on text center regions.
|
||||
|
||||
Args:
|
||||
top_line (ndarray): The points composing the top side lines of text
|
||||
polygons.
|
||||
bot_line (ndarray): The points composing bottom side lines of text
|
||||
polygons.
|
||||
center_line (ndarray): The points composing the center lines of
|
||||
text instances.
|
||||
center_region_mask (ndarray): The text center region mask.
|
||||
top_height_map (ndarray): The map on which the distance from points
|
||||
to top side lines will be drawn for each pixel in text center
|
||||
regions.
|
||||
bot_height_map (ndarray): The map on which the distance from points
|
||||
to bottom side lines will be drawn for each pixel in text
|
||||
center regions.
|
||||
sin_map (ndarray): The map of vector_sin(top_point - bot_point)
|
||||
that will be drawn on text center regions.
|
||||
cos_map (ndarray): The map of vector_cos(top_point - bot_point)
|
||||
will be drawn on text center regions.
|
||||
region_shrink_ratio (float): The shrink ratio of text center
|
||||
regions.
|
||||
"""
|
||||
|
||||
assert top_line.shape == bot_line.shape == center_line.shape
|
||||
assert (center_region_mask.shape == top_height_map.shape ==
|
||||
bot_height_map.shape == sin_map.shape == cos_map.shape)
|
||||
assert isinstance(region_shrink_ratio, float)
|
||||
|
||||
h, w = center_region_mask.shape
|
||||
for i in range(0, len(center_line) - 1):
|
||||
|
||||
top_mid_point = (top_line[i] + top_line[i + 1]) / 2
|
||||
bot_mid_point = (bot_line[i] + bot_line[i + 1]) / 2
|
||||
|
||||
sin_theta = self.vector_sin(top_mid_point - bot_mid_point)
|
||||
cos_theta = self.vector_cos(top_mid_point - bot_mid_point)
|
||||
|
||||
tl = center_line[i] + (top_line[i] -
|
||||
center_line[i]) * region_shrink_ratio
|
||||
tr = center_line[i + 1] + (
|
||||
top_line[i + 1] - center_line[i + 1]) * region_shrink_ratio
|
||||
br = center_line[i + 1] + (
|
||||
bot_line[i + 1] - center_line[i + 1]) * region_shrink_ratio
|
||||
bl = center_line[i] + (bot_line[i] -
|
||||
center_line[i]) * region_shrink_ratio
|
||||
current_center_box = np.vstack([tl, tr, br, bl]).astype(np.int32)
|
||||
|
||||
cv2.fillPoly(center_region_mask, [current_center_box], color=1)
|
||||
cv2.fillPoly(sin_map, [current_center_box], color=sin_theta)
|
||||
cv2.fillPoly(cos_map, [current_center_box], color=cos_theta)
|
||||
|
||||
current_center_box[:, 0] = np.clip(current_center_box[:, 0], 0,
|
||||
w - 1)
|
||||
current_center_box[:, 1] = np.clip(current_center_box[:, 1], 0,
|
||||
h - 1)
|
||||
min_coord = np.min(current_center_box, axis=0).astype(np.int32)
|
||||
max_coord = np.max(current_center_box, axis=0).astype(np.int32)
|
||||
current_center_box = current_center_box - min_coord
|
||||
box_sz = (max_coord - min_coord + 1)
|
||||
|
||||
center_box_mask = np.zeros((box_sz[1], box_sz[0]), dtype=np.uint8)
|
||||
cv2.fillPoly(center_box_mask, [current_center_box], color=1)
|
||||
|
||||
inds = np.argwhere(center_box_mask > 0)
|
||||
inds = inds + (min_coord[1], min_coord[0])
|
||||
inds_xy = np.fliplr(inds)
|
||||
top_height_map[(inds[:, 0], inds[:, 1])] = self.dist_point2line(
|
||||
inds_xy, (top_line[i], top_line[i + 1]))
|
||||
bot_height_map[(inds[:, 0], inds[:, 1])] = self.dist_point2line(
|
||||
inds_xy, (bot_line[i], bot_line[i + 1]))
|
||||
|
||||
def generate_center_mask_attrib_maps(self, img_size, text_polys):
|
||||
"""Generate text center region masks and geometric attribute maps.
|
||||
|
||||
Args:
|
||||
img_size (tuple): The image size (height, width).
|
||||
text_polys (list[list[ndarray]]): The list of text polygons.
|
||||
|
||||
Returns:
|
||||
center_lines (list): The list of text center lines.
|
||||
center_region_mask (ndarray): The text center region mask.
|
||||
top_height_map (ndarray): The map on which the distance from points
|
||||
to top side lines will be drawn for each pixel in text center
|
||||
regions.
|
||||
bot_height_map (ndarray): The map on which the distance from points
|
||||
to bottom side lines will be drawn for each pixel in text
|
||||
center regions.
|
||||
sin_map (ndarray): The sin(theta) map where theta is the angle
|
||||
between vector (top point - bottom point) and vector (1, 0).
|
||||
cos_map (ndarray): The cos(theta) map where theta is the angle
|
||||
between vector (top point - bottom point) and vector (1, 0).
|
||||
"""
|
||||
|
||||
assert isinstance(img_size, tuple)
|
||||
assert check_argument.is_2dlist(text_polys)
|
||||
|
||||
h, w = img_size
|
||||
|
||||
center_lines = []
|
||||
center_region_mask = np.zeros((h, w), np.uint8)
|
||||
top_height_map = np.zeros((h, w), dtype=np.float32)
|
||||
bot_height_map = np.zeros((h, w), dtype=np.float32)
|
||||
sin_map = np.zeros((h, w), dtype=np.float32)
|
||||
cos_map = np.zeros((h, w), dtype=np.float32)
|
||||
|
||||
for poly in text_polys:
|
||||
assert len(poly) == 1
|
||||
polygon_points = poly[0].reshape(-1, 2)
|
||||
_, _, top_line, bot_line = self.reorder_poly_edge(polygon_points)
|
||||
resampled_top_line, resampled_bot_line = self.resample_sidelines(
|
||||
top_line, bot_line, self.resample_step)
|
||||
resampled_bot_line = resampled_bot_line[::-1]
|
||||
center_line = (resampled_top_line + resampled_bot_line) / 2
|
||||
|
||||
if self.vector_slope(center_line[-1] - center_line[0]) > 2:
|
||||
if (center_line[-1] - center_line[0])[1] < 0:
|
||||
center_line = center_line[::-1]
|
||||
resampled_top_line = resampled_top_line[::-1]
|
||||
resampled_bot_line = resampled_bot_line[::-1]
|
||||
else:
|
||||
if (center_line[-1] - center_line[0])[0] < 0:
|
||||
center_line = center_line[::-1]
|
||||
resampled_top_line = resampled_top_line[::-1]
|
||||
resampled_bot_line = resampled_bot_line[::-1]
|
||||
|
||||
line_head_shrink_len = np.clip(
|
||||
(norm(top_line[0] - bot_line[0]) * self.comp_w_h_ratio),
|
||||
self.min_width, self.max_width) / 2
|
||||
line_tail_shrink_len = np.clip(
|
||||
(norm(top_line[-1] - bot_line[-1]) * self.comp_w_h_ratio),
|
||||
self.min_width, self.max_width) / 2
|
||||
num_head_shrink = int(line_head_shrink_len // self.resample_step)
|
||||
num_tail_shrink = int(line_tail_shrink_len // self.resample_step)
|
||||
if len(center_line) > num_head_shrink + num_tail_shrink + 2:
|
||||
center_line = center_line[num_head_shrink:len(center_line) -
|
||||
num_tail_shrink]
|
||||
resampled_top_line = resampled_top_line[
|
||||
num_head_shrink:len(resampled_top_line) - num_tail_shrink]
|
||||
resampled_bot_line = resampled_bot_line[
|
||||
num_head_shrink:len(resampled_bot_line) - num_tail_shrink]
|
||||
center_lines.append(center_line.astype(np.int32))
|
||||
|
||||
self.draw_center_region_maps(resampled_top_line,
|
||||
resampled_bot_line, center_line,
|
||||
center_region_mask, top_height_map,
|
||||
bot_height_map, sin_map, cos_map,
|
||||
self.center_region_shrink_ratio)
|
||||
|
||||
return (center_lines, center_region_mask, top_height_map,
|
||||
bot_height_map, sin_map, cos_map)
|
||||
|
||||
def generate_rand_comp_attribs(self, num_rand_comps, center_sample_mask):
|
||||
"""Generate random text components and their attributes to ensure the
|
||||
the number of text components in an image is larger than k_hop1, which
|
||||
is the number of one hop neighbors in KNN graph.
|
||||
|
||||
Args:
|
||||
num_rand_comps (int): The number of random text components.
|
||||
center_sample_mask (ndarray): The region mask for sampling text
|
||||
component centers .
|
||||
|
||||
Returns:
|
||||
rand_comp_attribs (ndarray): The random text component attributes
|
||||
(x, y, h, w, cos, sin, comp_label=0).
|
||||
"""
|
||||
|
||||
assert isinstance(num_rand_comps, int)
|
||||
assert num_rand_comps > 0
|
||||
assert center_sample_mask.ndim == 2
|
||||
|
||||
h, w = center_sample_mask.shape
|
||||
|
||||
max_rand_half_height = self.max_rand_half_height
|
||||
min_rand_half_height = self.min_rand_half_height
|
||||
max_rand_height = max_rand_half_height * 2
|
||||
max_rand_width = np.clip(max_rand_height * self.comp_w_h_ratio,
|
||||
self.min_width, self.max_width)
|
||||
margin = int(
|
||||
np.sqrt((max_rand_height / 2)**2 + (max_rand_width / 2)**2)) + 1
|
||||
|
||||
if 2 * margin + 1 > min(h, w):
|
||||
|
||||
assert min(h, w) > (np.sqrt(2) * (self.min_width + 1))
|
||||
max_rand_half_height = max(min(h, w) / 4, self.min_width / 2 + 1)
|
||||
min_rand_half_height = max(max_rand_half_height / 4,
|
||||
self.min_width / 2)
|
||||
|
||||
max_rand_height = max_rand_half_height * 2
|
||||
max_rand_width = np.clip(max_rand_height * self.comp_w_h_ratio,
|
||||
self.min_width, self.max_width)
|
||||
margin = int(
|
||||
np.sqrt((max_rand_height / 2)**2 +
|
||||
(max_rand_width / 2)**2)) + 1
|
||||
|
||||
inner_center_sample_mask = np.zeros_like(center_sample_mask)
|
||||
inner_center_sample_mask[margin:h - margin, margin:w - margin] = \
|
||||
center_sample_mask[margin:h - margin, margin:w - margin]
|
||||
kernel_size = int(np.clip(max_rand_half_height, 7, 21))
|
||||
inner_center_sample_mask = cv2.erode(
|
||||
inner_center_sample_mask,
|
||||
np.ones((kernel_size, kernel_size), np.uint8))
|
||||
|
||||
center_candidates = np.argwhere(inner_center_sample_mask > 0)
|
||||
num_center_candidates = len(center_candidates)
|
||||
sample_inds = np.random.choice(num_center_candidates, num_rand_comps)
|
||||
rand_centers = center_candidates[sample_inds]
|
||||
|
||||
rand_top_height = np.random.randint(
|
||||
min_rand_half_height,
|
||||
max_rand_half_height,
|
||||
size=(len(rand_centers), 1))
|
||||
rand_bot_height = np.random.randint(
|
||||
min_rand_half_height,
|
||||
max_rand_half_height,
|
||||
size=(len(rand_centers), 1))
|
||||
|
||||
rand_cos = 2 * np.random.random(size=(len(rand_centers), 1)) - 1
|
||||
rand_sin = 2 * np.random.random(size=(len(rand_centers), 1)) - 1
|
||||
scale = np.sqrt(1.0 / (rand_cos**2 + rand_sin**2 + 1e-8))
|
||||
rand_cos = rand_cos * scale
|
||||
rand_sin = rand_sin * scale
|
||||
|
||||
height = (rand_top_height + rand_bot_height)
|
||||
width = np.clip(height * self.comp_w_h_ratio, self.min_width,
|
||||
self.max_width)
|
||||
|
||||
rand_comp_attribs = np.hstack([
|
||||
rand_centers[:, ::-1], height, width, rand_cos, rand_sin,
|
||||
np.zeros_like(rand_sin)
|
||||
]).astype(np.float32)
|
||||
|
||||
return rand_comp_attribs
|
||||
|
||||
def jitter_comp_attribs(self, comp_attribs, jitter_level):
|
||||
"""Jitter text components attributes.
|
||||
|
||||
Args:
|
||||
comp_attribs (ndarray): The text component attributes.
|
||||
jitter_level (float): The jitter level of text components
|
||||
attributes.
|
||||
|
||||
Returns:
|
||||
jittered_comp_attribs (ndarray): The jittered text component
|
||||
attributes (x, y, h, w, cos, sin, comp_label).
|
||||
"""
|
||||
|
||||
assert comp_attribs.shape[1] == 7
|
||||
assert comp_attribs.shape[0] > 0
|
||||
assert isinstance(jitter_level, float)
|
||||
|
||||
x = comp_attribs[:, 0].reshape((-1, 1))
|
||||
y = comp_attribs[:, 1].reshape((-1, 1))
|
||||
h = comp_attribs[:, 2].reshape((-1, 1))
|
||||
w = comp_attribs[:, 3].reshape((-1, 1))
|
||||
cos = comp_attribs[:, 4].reshape((-1, 1))
|
||||
sin = comp_attribs[:, 5].reshape((-1, 1))
|
||||
comp_labels = comp_attribs[:, 6].reshape((-1, 1))
|
||||
|
||||
x += (np.random.random(size=(len(comp_attribs), 1)) -
|
||||
0.5) * (h * np.abs(cos) + w * np.abs(sin)) * jitter_level
|
||||
y += (np.random.random(size=(len(comp_attribs), 1)) -
|
||||
0.5) * (h * np.abs(sin) + w * np.abs(cos)) * jitter_level
|
||||
|
||||
h += (np.random.random(size=(len(comp_attribs), 1)) -
|
||||
0.5) * h * jitter_level
|
||||
w += (np.random.random(size=(len(comp_attribs), 1)) -
|
||||
0.5) * w * jitter_level
|
||||
|
||||
cos += (np.random.random(size=(len(comp_attribs), 1)) -
|
||||
0.5) * 2 * jitter_level
|
||||
sin += (np.random.random(size=(len(comp_attribs), 1)) -
|
||||
0.5) * 2 * jitter_level
|
||||
|
||||
scale = np.sqrt(1.0 / (cos**2 + sin**2 + 1e-8))
|
||||
cos = cos * scale
|
||||
sin = sin * scale
|
||||
|
||||
jittered_comp_attribs = np.hstack([x, y, h, w, cos, sin, comp_labels])
|
||||
|
||||
return jittered_comp_attribs
|
||||
|
||||
def generate_comp_attribs(self, center_lines, text_mask,
|
||||
center_region_mask, top_height_map,
|
||||
bot_height_map, sin_map, cos_map):
|
||||
"""Generate text component attributes.
|
||||
|
||||
Args:
|
||||
center_lines (list[ndarray]): The list of text center lines .
|
||||
text_mask (ndarray): The text region mask.
|
||||
center_region_mask (ndarray): The text center region mask.
|
||||
top_height_map (ndarray): The map on which the distance from points
|
||||
to top side lines will be drawn for each pixel in text center
|
||||
regions.
|
||||
bot_height_map (ndarray): The map on which the distance from points
|
||||
to bottom side lines will be drawn for each pixel in text
|
||||
center regions.
|
||||
sin_map (ndarray): The sin(theta) map where theta is the angle
|
||||
between vector (top point - bottom point) and vector (1, 0).
|
||||
cos_map (ndarray): The cos(theta) map where theta is the angle
|
||||
between vector (top point - bottom point) and vector (1, 0).
|
||||
|
||||
Returns:
|
||||
pad_comp_attribs (ndarray): The padded text component attributes
|
||||
of a fixed size.
|
||||
"""
|
||||
|
||||
assert isinstance(center_lines, list)
|
||||
assert (text_mask.shape == center_region_mask.shape ==
|
||||
top_height_map.shape == bot_height_map.shape == sin_map.shape
|
||||
== cos_map.shape)
|
||||
|
||||
center_lines_mask = np.zeros_like(center_region_mask)
|
||||
cv2.polylines(center_lines_mask, center_lines, 0, 1, 1)
|
||||
center_lines_mask = center_lines_mask * center_region_mask
|
||||
comp_centers = np.argwhere(center_lines_mask > 0)
|
||||
|
||||
y = comp_centers[:, 0]
|
||||
x = comp_centers[:, 1]
|
||||
|
||||
top_height = top_height_map[y, x].reshape(
|
||||
(-1, 1)) * self.comp_shrink_ratio
|
||||
bot_height = bot_height_map[y, x].reshape(
|
||||
(-1, 1)) * self.comp_shrink_ratio
|
||||
sin = sin_map[y, x].reshape((-1, 1))
|
||||
cos = cos_map[y, x].reshape((-1, 1))
|
||||
|
||||
top_mid_points = comp_centers + np.hstack(
|
||||
[top_height * sin, top_height * cos])
|
||||
bot_mid_points = comp_centers - np.hstack(
|
||||
[bot_height * sin, bot_height * cos])
|
||||
|
||||
width = (top_height + bot_height) * self.comp_w_h_ratio
|
||||
width = np.clip(width, self.min_width, self.max_width)
|
||||
r = width / 2
|
||||
|
||||
tl = top_mid_points[:, ::-1] - np.hstack([-r * sin, r * cos])
|
||||
tr = top_mid_points[:, ::-1] + np.hstack([-r * sin, r * cos])
|
||||
br = bot_mid_points[:, ::-1] + np.hstack([-r * sin, r * cos])
|
||||
bl = bot_mid_points[:, ::-1] - np.hstack([-r * sin, r * cos])
|
||||
text_comps = np.hstack([tl, tr, br, bl]).astype(np.float32)
|
||||
|
||||
score = np.ones((text_comps.shape[0], 1), dtype=np.float32)
|
||||
text_comps = np.hstack([text_comps, score])
|
||||
text_comps = la_nms(text_comps, self.text_comp_nms_thr)
|
||||
|
||||
if text_comps.shape[0] >= 1:
|
||||
img_h, img_w = center_region_mask.shape
|
||||
text_comps[:, 0:8:2] = np.clip(text_comps[:, 0:8:2], 0, img_w - 1)
|
||||
text_comps[:, 1:8:2] = np.clip(text_comps[:, 1:8:2], 0, img_h - 1)
|
||||
|
||||
comp_centers = np.mean(
|
||||
text_comps[:, 0:8].reshape((-1, 4, 2)),
|
||||
axis=1).astype(np.int32)
|
||||
x = comp_centers[:, 0]
|
||||
y = comp_centers[:, 1]
|
||||
|
||||
height = (top_height_map[y, x] + bot_height_map[y, x]).reshape(
|
||||
(-1, 1))
|
||||
width = np.clip(height * self.comp_w_h_ratio, self.min_width,
|
||||
self.max_width)
|
||||
|
||||
cos = cos_map[y, x].reshape((-1, 1))
|
||||
sin = sin_map[y, x].reshape((-1, 1))
|
||||
|
||||
_, comp_label_mask = cv2.connectedComponents(
|
||||
center_region_mask, connectivity=8)
|
||||
comp_labels = comp_label_mask[y, x].reshape(
|
||||
(-1, 1)).astype(np.float32)
|
||||
|
||||
x = x.reshape((-1, 1)).astype(np.float32)
|
||||
y = y.reshape((-1, 1)).astype(np.float32)
|
||||
comp_attribs = np.hstack(
|
||||
[x, y, height, width, cos, sin, comp_labels])
|
||||
comp_attribs = self.jitter_comp_attribs(comp_attribs,
|
||||
self.jitter_level)
|
||||
|
||||
if comp_attribs.shape[0] < self.num_min_comps:
|
||||
num_rand_comps = self.num_min_comps - comp_attribs.shape[0]
|
||||
rand_comp_attribs = self.generate_rand_comp_attribs(
|
||||
num_rand_comps, 1 - text_mask)
|
||||
comp_attribs = np.vstack([comp_attribs, rand_comp_attribs])
|
||||
else:
|
||||
comp_attribs = self.generate_rand_comp_attribs(
|
||||
self.num_min_comps, 1 - text_mask)
|
||||
|
||||
num_comps = (
|
||||
np.ones((comp_attribs.shape[0], 1), dtype=np.float32) *
|
||||
comp_attribs.shape[0])
|
||||
comp_attribs = np.hstack([num_comps, comp_attribs])
|
||||
|
||||
if comp_attribs.shape[0] > self.num_max_comps:
|
||||
comp_attribs = comp_attribs[:self.num_max_comps, :]
|
||||
comp_attribs[:, 0] = self.num_max_comps
|
||||
|
||||
pad_comp_attribs = np.zeros(
|
||||
(self.num_max_comps, comp_attribs.shape[1]), dtype=np.float32)
|
||||
pad_comp_attribs[:comp_attribs.shape[0], :] = comp_attribs
|
||||
|
||||
return pad_comp_attribs
|
||||
|
||||
def generate_targets(self, results):
|
||||
"""Generate the gt targets for DRRG.
|
||||
|
||||
Args:
|
||||
results (dict): The input result dictionary.
|
||||
|
||||
Returns:
|
||||
results (dict): The output result dictionary.
|
||||
"""
|
||||
|
||||
assert isinstance(results, dict)
|
||||
|
||||
polygon_masks = results['gt_masks'].masks
|
||||
polygon_masks_ignore = results['gt_masks_ignore'].masks
|
||||
|
||||
h, w, _ = results['img_shape']
|
||||
|
||||
gt_text_mask = self.generate_text_region_mask((h, w), polygon_masks)
|
||||
gt_mask = self.generate_effective_mask((h, w), polygon_masks_ignore)
|
||||
(center_lines, gt_center_region_mask, gt_top_height_map,
|
||||
gt_bot_height_map, gt_sin_map,
|
||||
gt_cos_map) = self.generate_center_mask_attrib_maps((h, w),
|
||||
polygon_masks)
|
||||
|
||||
gt_comp_attribs = self.generate_comp_attribs(center_lines,
|
||||
gt_text_mask,
|
||||
gt_center_region_mask,
|
||||
gt_top_height_map,
|
||||
gt_bot_height_map,
|
||||
gt_sin_map, gt_cos_map)
|
||||
|
||||
results['mask_fields'].clear() # rm gt_masks encoded by polygons
|
||||
mapping = {
|
||||
'gt_text_mask': gt_text_mask,
|
||||
'gt_center_region_mask': gt_center_region_mask,
|
||||
'gt_mask': gt_mask,
|
||||
'gt_top_height_map': gt_top_height_map,
|
||||
'gt_bot_height_map': gt_bot_height_map,
|
||||
'gt_sin_map': gt_sin_map,
|
||||
'gt_cos_map': gt_cos_map
|
||||
}
|
||||
for key, value in mapping.items():
|
||||
value = value if isinstance(value, list) else [value]
|
||||
results[key] = BitmapMasks(value, h, w)
|
||||
results['mask_fields'].append(key)
|
||||
|
||||
results['gt_comp_attribs'] = gt_comp_attribs
|
||||
return results
|
@ -1,361 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import cv2
|
||||
import numpy as np
|
||||
from numpy.fft import fft
|
||||
from numpy.linalg import norm
|
||||
|
||||
import mmocr.utils.check_argument as check_argument
|
||||
from mmocr.registry import TRANSFORMS
|
||||
from .textsnake_targets import TextSnakeTargets
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class FCENetTargets(TextSnakeTargets):
|
||||
"""Generate the ground truth targets of FCENet: Fourier Contour Embedding
|
||||
for Arbitrary-Shaped Text Detection.
|
||||
|
||||
[https://arxiv.org/abs/2104.10442]
|
||||
|
||||
Args:
|
||||
fourier_degree (int): The maximum Fourier transform degree k.
|
||||
resample_step (float): The step size for resampling the text center
|
||||
line (TCL). It's better not to exceed half of the minimum width.
|
||||
center_region_shrink_ratio (float): The shrink ratio of text center
|
||||
region.
|
||||
level_size_divisors (tuple(int)): The downsample ratio on each level.
|
||||
level_proportion_range (tuple(tuple(int))): The range of text sizes
|
||||
assigned to each level.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
fourier_degree=5,
|
||||
resample_step=4.0,
|
||||
center_region_shrink_ratio=0.3,
|
||||
level_size_divisors=(8, 16, 32),
|
||||
level_proportion_range=((0, 0.4), (0.3, 0.7), (0.6, 1.0))):
|
||||
|
||||
super().__init__()
|
||||
assert isinstance(level_size_divisors, tuple)
|
||||
assert isinstance(level_proportion_range, tuple)
|
||||
assert len(level_size_divisors) == len(level_proportion_range)
|
||||
self.fourier_degree = fourier_degree
|
||||
self.resample_step = resample_step
|
||||
self.center_region_shrink_ratio = center_region_shrink_ratio
|
||||
self.level_size_divisors = level_size_divisors
|
||||
self.level_proportion_range = level_proportion_range
|
||||
|
||||
def generate_center_region_mask(self, img_size, text_polys):
|
||||
"""Generate text center region mask.
|
||||
|
||||
Args:
|
||||
img_size (tuple): The image size of (height, width).
|
||||
text_polys (list[list[ndarray]]): The list of text polygons.
|
||||
|
||||
Returns:
|
||||
center_region_mask (ndarray): The text center region mask.
|
||||
"""
|
||||
|
||||
assert isinstance(img_size, tuple)
|
||||
assert check_argument.is_2dlist(text_polys)
|
||||
|
||||
h, w = img_size
|
||||
|
||||
center_region_mask = np.zeros((h, w), np.uint8)
|
||||
|
||||
center_region_boxes = []
|
||||
for poly in text_polys:
|
||||
assert len(poly) == 1
|
||||
polygon_points = poly[0].reshape(-1, 2)
|
||||
_, _, top_line, bot_line = self.reorder_poly_edge(polygon_points)
|
||||
resampled_top_line, resampled_bot_line = self.resample_sidelines(
|
||||
top_line, bot_line, self.resample_step)
|
||||
resampled_bot_line = resampled_bot_line[::-1]
|
||||
center_line = (resampled_top_line + resampled_bot_line) / 2
|
||||
|
||||
line_head_shrink_len = norm(resampled_top_line[0] -
|
||||
resampled_bot_line[0]) / 4.0
|
||||
line_tail_shrink_len = norm(resampled_top_line[-1] -
|
||||
resampled_bot_line[-1]) / 4.0
|
||||
head_shrink_num = int(line_head_shrink_len // self.resample_step)
|
||||
tail_shrink_num = int(line_tail_shrink_len // self.resample_step)
|
||||
if len(center_line) > head_shrink_num + tail_shrink_num + 2:
|
||||
center_line = center_line[head_shrink_num:len(center_line) -
|
||||
tail_shrink_num]
|
||||
resampled_top_line = resampled_top_line[
|
||||
head_shrink_num:len(resampled_top_line) - tail_shrink_num]
|
||||
resampled_bot_line = resampled_bot_line[
|
||||
head_shrink_num:len(resampled_bot_line) - tail_shrink_num]
|
||||
|
||||
for i in range(0, len(center_line) - 1):
|
||||
tl = center_line[i] + (resampled_top_line[i] - center_line[i]
|
||||
) * self.center_region_shrink_ratio
|
||||
tr = center_line[i + 1] + (
|
||||
resampled_top_line[i + 1] -
|
||||
center_line[i + 1]) * self.center_region_shrink_ratio
|
||||
br = center_line[i + 1] + (
|
||||
resampled_bot_line[i + 1] -
|
||||
center_line[i + 1]) * self.center_region_shrink_ratio
|
||||
bl = center_line[i] + (resampled_bot_line[i] - center_line[i]
|
||||
) * self.center_region_shrink_ratio
|
||||
current_center_box = np.vstack([tl, tr, br,
|
||||
bl]).astype(np.int32)
|
||||
center_region_boxes.append(current_center_box)
|
||||
|
||||
cv2.fillPoly(center_region_mask, center_region_boxes, 1)
|
||||
return center_region_mask
|
||||
|
||||
def resample_polygon(self, polygon, n=400):
|
||||
"""Resample one polygon with n points on its boundary.
|
||||
|
||||
Args:
|
||||
polygon (list[float]): The input polygon.
|
||||
n (int): The number of resampled points.
|
||||
Returns:
|
||||
resampled_polygon (list[float]): The resampled polygon.
|
||||
"""
|
||||
length = []
|
||||
|
||||
for i in range(len(polygon)):
|
||||
p1 = polygon[i]
|
||||
if i == len(polygon) - 1:
|
||||
p2 = polygon[0]
|
||||
else:
|
||||
p2 = polygon[i + 1]
|
||||
length.append(((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)**0.5)
|
||||
|
||||
total_length = sum(length)
|
||||
n_on_each_line = (np.array(length) / (total_length + 1e-8)) * n
|
||||
n_on_each_line = n_on_each_line.astype(np.int32)
|
||||
new_polygon = []
|
||||
|
||||
for i in range(len(polygon)):
|
||||
num = n_on_each_line[i]
|
||||
p1 = polygon[i]
|
||||
if i == len(polygon) - 1:
|
||||
p2 = polygon[0]
|
||||
else:
|
||||
p2 = polygon[i + 1]
|
||||
|
||||
if num == 0:
|
||||
continue
|
||||
|
||||
dxdy = (p2 - p1) / num
|
||||
for j in range(num):
|
||||
point = p1 + dxdy * j
|
||||
new_polygon.append(point)
|
||||
|
||||
return np.array(new_polygon)
|
||||
|
||||
def normalize_polygon(self, polygon):
|
||||
"""Normalize one polygon so that its start point is at right most.
|
||||
|
||||
Args:
|
||||
polygon (list[float]): The origin polygon.
|
||||
Returns:
|
||||
new_polygon (lost[float]): The polygon with start point at right.
|
||||
"""
|
||||
temp_polygon = polygon - polygon.mean(axis=0)
|
||||
x = np.abs(temp_polygon[:, 0])
|
||||
y = temp_polygon[:, 1]
|
||||
index_x = np.argsort(x)
|
||||
index_y = np.argmin(y[index_x[:8]])
|
||||
index = index_x[index_y]
|
||||
new_polygon = np.concatenate([polygon[index:], polygon[:index]])
|
||||
return new_polygon
|
||||
|
||||
def poly2fourier(self, polygon, fourier_degree):
|
||||
"""Perform Fourier transformation to generate Fourier coefficients ck
|
||||
from polygon.
|
||||
|
||||
Args:
|
||||
polygon (ndarray): An input polygon.
|
||||
fourier_degree (int): The maximum Fourier degree K.
|
||||
Returns:
|
||||
c (ndarray(complex)): Fourier coefficients.
|
||||
"""
|
||||
points = polygon[:, 0] + polygon[:, 1] * 1j
|
||||
c_fft = fft(points) / len(points)
|
||||
c = np.hstack((c_fft[-fourier_degree:], c_fft[:fourier_degree + 1]))
|
||||
return c
|
||||
|
||||
def clockwise(self, c, fourier_degree):
|
||||
"""Make sure the polygon reconstructed from Fourier coefficients c in
|
||||
the clockwise direction.
|
||||
|
||||
Args:
|
||||
polygon (list[float]): The origin polygon.
|
||||
Returns:
|
||||
new_polygon (lost[float]): The polygon in clockwise point order.
|
||||
"""
|
||||
if np.abs(c[fourier_degree + 1]) > np.abs(c[fourier_degree - 1]):
|
||||
return c
|
||||
elif np.abs(c[fourier_degree + 1]) < np.abs(c[fourier_degree - 1]):
|
||||
return c[::-1]
|
||||
else:
|
||||
if np.abs(c[fourier_degree + 2]) > np.abs(c[fourier_degree - 2]):
|
||||
return c
|
||||
else:
|
||||
return c[::-1]
|
||||
|
||||
def cal_fourier_signature(self, polygon, fourier_degree):
|
||||
"""Calculate Fourier signature from input polygon.
|
||||
|
||||
Args:
|
||||
polygon (ndarray): The input polygon.
|
||||
fourier_degree (int): The maximum Fourier degree K.
|
||||
Returns:
|
||||
fourier_signature (ndarray): An array shaped (2k+1, 2) containing
|
||||
real part and image part of 2k+1 Fourier coefficients.
|
||||
"""
|
||||
resampled_polygon = self.resample_polygon(polygon)
|
||||
resampled_polygon = self.normalize_polygon(resampled_polygon)
|
||||
|
||||
fourier_coeff = self.poly2fourier(resampled_polygon, fourier_degree)
|
||||
fourier_coeff = self.clockwise(fourier_coeff, fourier_degree)
|
||||
|
||||
real_part = np.real(fourier_coeff).reshape((-1, 1))
|
||||
image_part = np.imag(fourier_coeff).reshape((-1, 1))
|
||||
fourier_signature = np.hstack([real_part, image_part])
|
||||
|
||||
return fourier_signature
|
||||
|
||||
def generate_fourier_maps(self, img_size, text_polys):
|
||||
"""Generate Fourier coefficient maps.
|
||||
|
||||
Args:
|
||||
img_size (tuple): The image size of (height, width).
|
||||
text_polys (list[list[ndarray]]): The list of text polygons.
|
||||
|
||||
Returns:
|
||||
fourier_real_map (ndarray): The Fourier coefficient real part maps.
|
||||
fourier_image_map (ndarray): The Fourier coefficient image part
|
||||
maps.
|
||||
"""
|
||||
|
||||
assert isinstance(img_size, tuple)
|
||||
assert check_argument.is_2dlist(text_polys)
|
||||
|
||||
h, w = img_size
|
||||
k = self.fourier_degree
|
||||
real_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32)
|
||||
imag_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32)
|
||||
|
||||
for poly in text_polys:
|
||||
assert len(poly) == 1
|
||||
text_instance = [[poly[0][i], poly[0][i + 1]]
|
||||
for i in range(0, len(poly[0]), 2)]
|
||||
mask = np.zeros((h, w), dtype=np.uint8)
|
||||
polygon = np.array(text_instance).reshape((1, -1, 2))
|
||||
cv2.fillPoly(mask, polygon.astype(np.int32), 1)
|
||||
fourier_coeff = self.cal_fourier_signature(polygon[0], k)
|
||||
for i in range(-k, k + 1):
|
||||
if i != 0:
|
||||
real_map[i + k, :, :] = mask * fourier_coeff[i + k, 0] + (
|
||||
1 - mask) * real_map[i + k, :, :]
|
||||
imag_map[i + k, :, :] = mask * fourier_coeff[i + k, 1] + (
|
||||
1 - mask) * imag_map[i + k, :, :]
|
||||
else:
|
||||
yx = np.argwhere(mask > 0.5)
|
||||
k_ind = np.ones((len(yx)), dtype=np.int64) * k
|
||||
y, x = yx[:, 0], yx[:, 1]
|
||||
real_map[k_ind, y, x] = fourier_coeff[k, 0] - x
|
||||
imag_map[k_ind, y, x] = fourier_coeff[k, 1] - y
|
||||
|
||||
return real_map, imag_map
|
||||
|
||||
def generate_level_targets(self, img_size, text_polys, ignore_polys):
|
||||
"""Generate ground truth target on each level.
|
||||
|
||||
Args:
|
||||
img_size (list[int]): Shape of input image.
|
||||
text_polys (list[list[ndarray]]): A list of ground truth polygons.
|
||||
ignore_polys (list[list[ndarray]]): A list of ignored polygons.
|
||||
Returns:
|
||||
level_maps (list(ndarray)): A list of ground target on each level.
|
||||
"""
|
||||
h, w = img_size
|
||||
lv_size_divs = self.level_size_divisors
|
||||
lv_proportion_range = self.level_proportion_range
|
||||
lv_text_polys = [[] for i in range(len(lv_size_divs))]
|
||||
lv_ignore_polys = [[] for i in range(len(lv_size_divs))]
|
||||
level_maps = []
|
||||
for poly in text_polys:
|
||||
assert len(poly) == 1
|
||||
text_instance = [[poly[0][i], poly[0][i + 1]]
|
||||
for i in range(0, len(poly[0]), 2)]
|
||||
polygon = np.array(text_instance, dtype=np.int).reshape((1, -1, 2))
|
||||
_, _, box_w, box_h = cv2.boundingRect(polygon)
|
||||
proportion = max(box_h, box_w) / (h + 1e-8)
|
||||
|
||||
for ind, proportion_range in enumerate(lv_proportion_range):
|
||||
if proportion_range[0] < proportion < proportion_range[1]:
|
||||
lv_text_polys[ind].append([poly[0] / lv_size_divs[ind]])
|
||||
|
||||
for ignore_poly in ignore_polys:
|
||||
assert len(ignore_poly) == 1
|
||||
text_instance = [[ignore_poly[0][i], ignore_poly[0][i + 1]]
|
||||
for i in range(0, len(ignore_poly[0]), 2)]
|
||||
polygon = np.array(text_instance, dtype=np.int).reshape((1, -1, 2))
|
||||
_, _, box_w, box_h = cv2.boundingRect(polygon)
|
||||
proportion = max(box_h, box_w) / (h + 1e-8)
|
||||
|
||||
for ind, proportion_range in enumerate(lv_proportion_range):
|
||||
if proportion_range[0] < proportion < proportion_range[1]:
|
||||
lv_ignore_polys[ind].append(
|
||||
[ignore_poly[0] / lv_size_divs[ind]])
|
||||
|
||||
for ind, size_divisor in enumerate(lv_size_divs):
|
||||
current_level_maps = []
|
||||
level_img_size = (h // size_divisor, w // size_divisor)
|
||||
|
||||
text_region = self.generate_text_region_mask(
|
||||
level_img_size, lv_text_polys[ind])[None]
|
||||
current_level_maps.append(text_region)
|
||||
|
||||
center_region = self.generate_center_region_mask(
|
||||
level_img_size, lv_text_polys[ind])[None]
|
||||
current_level_maps.append(center_region)
|
||||
|
||||
effective_mask = self.generate_effective_mask(
|
||||
level_img_size, lv_ignore_polys[ind])[None]
|
||||
current_level_maps.append(effective_mask)
|
||||
|
||||
fourier_real_map, fourier_image_maps = self.generate_fourier_maps(
|
||||
level_img_size, lv_text_polys[ind])
|
||||
current_level_maps.append(fourier_real_map)
|
||||
current_level_maps.append(fourier_image_maps)
|
||||
|
||||
level_maps.append(np.concatenate(current_level_maps))
|
||||
|
||||
return level_maps
|
||||
|
||||
def generate_targets(self, results):
|
||||
"""Generate the ground truth targets for FCENet.
|
||||
|
||||
Args:
|
||||
results (dict): The input result dictionary.
|
||||
|
||||
Returns:
|
||||
results (dict): The output result dictionary.
|
||||
"""
|
||||
|
||||
assert isinstance(results, dict)
|
||||
|
||||
polygon_masks = results['gt_masks'].masks
|
||||
polygon_masks_ignore = results['gt_masks_ignore'].masks
|
||||
|
||||
h, w, _ = results['img_shape']
|
||||
|
||||
level_maps = self.generate_level_targets((h, w), polygon_masks,
|
||||
polygon_masks_ignore)
|
||||
|
||||
results['mask_fields'].clear() # rm gt_masks encoded by polygons
|
||||
mapping = {
|
||||
'p3_maps': level_maps[0],
|
||||
'p4_maps': level_maps[1],
|
||||
'p5_maps': level_maps[2]
|
||||
}
|
||||
for key, value in mapping.items():
|
||||
results[key] = value
|
||||
|
||||
return results
|
@ -1,65 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from mmdet.core import BitmapMasks
|
||||
|
||||
from mmocr.registry import TRANSFORMS
|
||||
from . import BaseTextDetTargets
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class PANetTargets(BaseTextDetTargets):
|
||||
"""Generate the ground truths for PANet: Efficient and Accurate Arbitrary-
|
||||
Shaped Text Detection with Pixel Aggregation Network.
|
||||
|
||||
[https://arxiv.org/abs/1908.05900]. This code is partially adapted from
|
||||
https://github.com/WenmuZhou/PAN.pytorch.
|
||||
|
||||
Args:
|
||||
shrink_ratio (tuple[float]): The ratios for shrinking text instances.
|
||||
max_shrink (int): The maximum shrink distance.
|
||||
"""
|
||||
|
||||
def __init__(self, shrink_ratio=(1.0, 0.5), max_shrink=20):
|
||||
self.shrink_ratio = shrink_ratio
|
||||
self.max_shrink = max_shrink
|
||||
|
||||
def generate_targets(self, results):
|
||||
"""Generate the gt targets for PANet.
|
||||
|
||||
Args:
|
||||
results (dict): The input result dictionary.
|
||||
|
||||
Returns:
|
||||
results (dict): The output result dictionary.
|
||||
"""
|
||||
|
||||
assert isinstance(results, dict)
|
||||
|
||||
polygon_masks = results['gt_masks'].masks
|
||||
polygon_masks_ignore = results['gt_masks_ignore'].masks
|
||||
|
||||
h, w, _ = results['img_shape']
|
||||
gt_kernels = []
|
||||
for ratio in self.shrink_ratio:
|
||||
mask, _ = self.generate_kernels((h, w),
|
||||
polygon_masks,
|
||||
ratio,
|
||||
max_shrink=self.max_shrink,
|
||||
ignore_tags=None)
|
||||
gt_kernels.append(mask)
|
||||
gt_mask = self.generate_effective_mask((h, w), polygon_masks_ignore)
|
||||
|
||||
results['mask_fields'].clear() # rm gt_masks encoded by polygons
|
||||
if 'bbox_fields' in results:
|
||||
results['bbox_fields'].clear()
|
||||
results.pop('gt_labels', None)
|
||||
results.pop('gt_masks', None)
|
||||
results.pop('gt_bboxes', None)
|
||||
results.pop('gt_bboxes_ignore', None)
|
||||
|
||||
mapping = {'gt_kernels': gt_kernels, 'gt_mask': gt_mask}
|
||||
for key, value in mapping.items():
|
||||
value = value if isinstance(value, list) else [value]
|
||||
results[key] = BitmapMasks(value, h, w)
|
||||
results['mask_fields'].append(key)
|
||||
|
||||
return results
|
@ -1,22 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from mmocr.registry import TRANSFORMS
|
||||
from . import PANetTargets
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class PSENetTargets(PANetTargets):
|
||||
"""Generate the ground truth targets of PSENet: Shape robust text detection
|
||||
with progressive scale expansion network.
|
||||
|
||||
[https://arxiv.org/abs/1903.12473]. This code is partially adapted from
|
||||
https://github.com/whai362/PSENet.
|
||||
|
||||
Args:
|
||||
shrink_ratio(tuple(float)): The ratios for shrinking text instances.
|
||||
max_shrink(int): The maximum shrinking distance.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
shrink_ratio=(1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4),
|
||||
max_shrink=20):
|
||||
super().__init__(shrink_ratio=shrink_ratio, max_shrink=max_shrink)
|
@ -1,496 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import cv2
|
||||
import numpy as np
|
||||
from mmdet.core import BitmapMasks
|
||||
from numpy.linalg import norm
|
||||
|
||||
import mmocr.utils.check_argument as check_argument
|
||||
from mmocr.registry import TRANSFORMS
|
||||
from . import BaseTextDetTargets
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class TextSnakeTargets(BaseTextDetTargets):
|
||||
"""Generate the ground truth targets of TextSnake: TextSnake: A Flexible
|
||||
Representation for Detecting Text of Arbitrary Shapes.
|
||||
|
||||
[https://arxiv.org/abs/1807.01544]. This was partially adapted from
|
||||
https://github.com/princewang1994/TextSnake.pytorch.
|
||||
|
||||
Args:
|
||||
orientation_thr (float): The threshold for distinguishing between
|
||||
head edge and tail edge among the horizontal and vertical edges
|
||||
of a quadrangle.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
orientation_thr=2.0,
|
||||
resample_step=4.0,
|
||||
center_region_shrink_ratio=0.3):
|
||||
|
||||
super().__init__()
|
||||
self.orientation_thr = orientation_thr
|
||||
self.resample_step = resample_step
|
||||
self.center_region_shrink_ratio = center_region_shrink_ratio
|
||||
self.eps = 1e-8
|
||||
|
||||
def vector_angle(self, vec1, vec2):
|
||||
if vec1.ndim > 1:
|
||||
unit_vec1 = vec1 / (norm(vec1, axis=-1) + self.eps).reshape(
|
||||
(-1, 1))
|
||||
else:
|
||||
unit_vec1 = vec1 / (norm(vec1, axis=-1) + self.eps)
|
||||
if vec2.ndim > 1:
|
||||
unit_vec2 = vec2 / (norm(vec2, axis=-1) + self.eps).reshape(
|
||||
(-1, 1))
|
||||
else:
|
||||
unit_vec2 = vec2 / (norm(vec2, axis=-1) + self.eps)
|
||||
return np.arccos(
|
||||
np.clip(np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0))
|
||||
|
||||
def vector_slope(self, vec):
|
||||
assert len(vec) == 2
|
||||
return abs(vec[1] / (vec[0] + self.eps))
|
||||
|
||||
def vector_sin(self, vec):
|
||||
assert len(vec) == 2
|
||||
return vec[1] / (norm(vec) + self.eps)
|
||||
|
||||
def vector_cos(self, vec):
|
||||
assert len(vec) == 2
|
||||
return vec[0] / (norm(vec) + self.eps)
|
||||
|
||||
def find_head_tail(self, points, orientation_thr):
|
||||
"""Find the head edge and tail edge of a text polygon.
|
||||
|
||||
Args:
|
||||
points (ndarray): The points composing a text polygon.
|
||||
orientation_thr (float): The threshold for distinguishing between
|
||||
head edge and tail edge among the horizontal and vertical edges
|
||||
of a quadrangle.
|
||||
|
||||
Returns:
|
||||
head_inds (list): The indexes of two points composing head edge.
|
||||
tail_inds (list): The indexes of two points composing tail edge.
|
||||
"""
|
||||
|
||||
assert points.ndim == 2
|
||||
assert points.shape[0] >= 4
|
||||
assert points.shape[1] == 2
|
||||
assert isinstance(orientation_thr, float)
|
||||
|
||||
if len(points) > 4:
|
||||
pad_points = np.vstack([points, points[0]])
|
||||
edge_vec = pad_points[1:] - pad_points[:-1]
|
||||
|
||||
theta_sum = []
|
||||
adjacent_vec_theta = []
|
||||
for i, edge_vec1 in enumerate(edge_vec):
|
||||
adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]]
|
||||
adjacent_edge_vec = edge_vec[adjacent_ind]
|
||||
temp_theta_sum = np.sum(
|
||||
self.vector_angle(edge_vec1, adjacent_edge_vec))
|
||||
temp_adjacent_theta = self.vector_angle(
|
||||
adjacent_edge_vec[0], adjacent_edge_vec[1])
|
||||
theta_sum.append(temp_theta_sum)
|
||||
adjacent_vec_theta.append(temp_adjacent_theta)
|
||||
theta_sum_score = np.array(theta_sum) / np.pi
|
||||
adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi
|
||||
poly_center = np.mean(points, axis=0)
|
||||
edge_dist = np.maximum(
|
||||
norm(pad_points[1:] - poly_center, axis=-1),
|
||||
norm(pad_points[:-1] - poly_center, axis=-1))
|
||||
dist_score = edge_dist / (np.max(edge_dist) + self.eps)
|
||||
position_score = np.zeros(len(edge_vec))
|
||||
score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score
|
||||
score += 0.35 * dist_score
|
||||
if len(points) % 2 == 0:
|
||||
position_score[(len(score) // 2 - 1)] += 1
|
||||
position_score[-1] += 1
|
||||
score += 0.1 * position_score
|
||||
pad_score = np.concatenate([score, score])
|
||||
score_matrix = np.zeros((len(score), len(score) - 3))
|
||||
x = np.arange(len(score) - 3) / float(len(score) - 4)
|
||||
gaussian = 1. / (np.sqrt(2. * np.pi) * 0.5) * np.exp(-np.power(
|
||||
(x - 0.5) / 0.5, 2.) / 2)
|
||||
gaussian = gaussian / np.max(gaussian)
|
||||
for i in range(len(score)):
|
||||
score_matrix[i, :] = score[i] + pad_score[
|
||||
(i + 2):(i + len(score) - 1)] * gaussian * 0.3
|
||||
|
||||
head_start, tail_increment = np.unravel_index(
|
||||
score_matrix.argmax(), score_matrix.shape)
|
||||
tail_start = (head_start + tail_increment + 2) % len(points)
|
||||
head_end = (head_start + 1) % len(points)
|
||||
tail_end = (tail_start + 1) % len(points)
|
||||
|
||||
if head_end > tail_end:
|
||||
head_start, tail_start = tail_start, head_start
|
||||
head_end, tail_end = tail_end, head_end
|
||||
head_inds = [head_start, head_end]
|
||||
tail_inds = [tail_start, tail_end]
|
||||
else:
|
||||
if self.vector_slope(points[1] - points[0]) + self.vector_slope(
|
||||
points[3] - points[2]) < self.vector_slope(
|
||||
points[2] - points[1]) + self.vector_slope(points[0] -
|
||||
points[3]):
|
||||
horizontal_edge_inds = [[0, 1], [2, 3]]
|
||||
vertical_edge_inds = [[3, 0], [1, 2]]
|
||||
else:
|
||||
horizontal_edge_inds = [[3, 0], [1, 2]]
|
||||
vertical_edge_inds = [[0, 1], [2, 3]]
|
||||
|
||||
vertical_len_sum = norm(points[vertical_edge_inds[0][0]] -
|
||||
points[vertical_edge_inds[0][1]]) + norm(
|
||||
points[vertical_edge_inds[1][0]] -
|
||||
points[vertical_edge_inds[1][1]])
|
||||
horizontal_len_sum = norm(
|
||||
points[horizontal_edge_inds[0][0]] -
|
||||
points[horizontal_edge_inds[0][1]]) + norm(
|
||||
points[horizontal_edge_inds[1][0]] -
|
||||
points[horizontal_edge_inds[1][1]])
|
||||
|
||||
if vertical_len_sum > horizontal_len_sum * orientation_thr:
|
||||
head_inds = horizontal_edge_inds[0]
|
||||
tail_inds = horizontal_edge_inds[1]
|
||||
else:
|
||||
head_inds = vertical_edge_inds[0]
|
||||
tail_inds = vertical_edge_inds[1]
|
||||
|
||||
return head_inds, tail_inds
|
||||
|
||||
def reorder_poly_edge(self, points):
|
||||
"""Get the respective points composing head edge, tail edge, top
|
||||
sideline and bottom sideline.
|
||||
|
||||
Args:
|
||||
points (ndarray): The points composing a text polygon.
|
||||
|
||||
Returns:
|
||||
head_edge (ndarray): The two points composing the head edge of text
|
||||
polygon.
|
||||
tail_edge (ndarray): The two points composing the tail edge of text
|
||||
polygon.
|
||||
top_sideline (ndarray): The points composing top curved sideline of
|
||||
text polygon.
|
||||
bot_sideline (ndarray): The points composing bottom curved sideline
|
||||
of text polygon.
|
||||
"""
|
||||
|
||||
assert points.ndim == 2
|
||||
assert points.shape[0] >= 4
|
||||
assert points.shape[1] == 2
|
||||
|
||||
head_inds, tail_inds = self.find_head_tail(points,
|
||||
self.orientation_thr)
|
||||
head_edge, tail_edge = points[head_inds], points[tail_inds]
|
||||
|
||||
pad_points = np.vstack([points, points])
|
||||
if tail_inds[1] < 1:
|
||||
tail_inds[1] = len(points)
|
||||
sideline1 = pad_points[head_inds[1]:tail_inds[1]]
|
||||
sideline2 = pad_points[tail_inds[1]:(head_inds[1] + len(points))]
|
||||
sideline_mean_shift = np.mean(
|
||||
sideline1, axis=0) - np.mean(
|
||||
sideline2, axis=0)
|
||||
|
||||
if sideline_mean_shift[1] > 0:
|
||||
top_sideline, bot_sideline = sideline2, sideline1
|
||||
else:
|
||||
top_sideline, bot_sideline = sideline1, sideline2
|
||||
|
||||
return head_edge, tail_edge, top_sideline, bot_sideline
|
||||
|
||||
def cal_curve_length(self, line):
|
||||
"""Calculate the length of each edge on the discrete curve and the sum.
|
||||
|
||||
Args:
|
||||
line (ndarray): The points composing a discrete curve.
|
||||
|
||||
Returns:
|
||||
tuple: Returns (edges_length, total_length).
|
||||
|
||||
- | edge_length (ndarray): The length of each edge on the
|
||||
discrete curve.
|
||||
- | total_length (float): The total length of the discrete
|
||||
curve.
|
||||
"""
|
||||
|
||||
assert line.ndim == 2
|
||||
assert len(line) >= 2
|
||||
|
||||
edges_length = np.sqrt((line[1:, 0] - line[:-1, 0])**2 +
|
||||
(line[1:, 1] - line[:-1, 1])**2)
|
||||
total_length = np.sum(edges_length)
|
||||
return edges_length, total_length
|
||||
|
||||
def resample_line(self, line, n):
|
||||
"""Resample n points on a line.
|
||||
|
||||
Args:
|
||||
line (ndarray): The points composing a line.
|
||||
n (int): The resampled points number.
|
||||
|
||||
Returns:
|
||||
resampled_line (ndarray): The points composing the resampled line.
|
||||
"""
|
||||
|
||||
assert line.ndim == 2
|
||||
assert line.shape[0] >= 2
|
||||
assert line.shape[1] == 2
|
||||
assert isinstance(n, int)
|
||||
assert n > 2
|
||||
|
||||
edges_length, total_length = self.cal_curve_length(line)
|
||||
t_org = np.insert(np.cumsum(edges_length), 0, 0)
|
||||
unit_t = total_length / (n - 1)
|
||||
t_equidistant = np.arange(1, n - 1, dtype=np.float32) * unit_t
|
||||
edge_ind = 0
|
||||
points = [line[0]]
|
||||
for t in t_equidistant:
|
||||
while edge_ind < len(edges_length) - 1 and t > t_org[edge_ind + 1]:
|
||||
edge_ind += 1
|
||||
t_l, t_r = t_org[edge_ind], t_org[edge_ind + 1]
|
||||
weight = np.array([t_r - t, t - t_l], dtype=np.float32) / (
|
||||
t_r - t_l + self.eps)
|
||||
p_coords = np.dot(weight, line[[edge_ind, edge_ind + 1]])
|
||||
points.append(p_coords)
|
||||
points.append(line[-1])
|
||||
resampled_line = np.vstack(points)
|
||||
|
||||
return resampled_line
|
||||
|
||||
def resample_sidelines(self, sideline1, sideline2, resample_step):
|
||||
"""Resample two sidelines to be of the same points number according to
|
||||
step size.
|
||||
|
||||
Args:
|
||||
sideline1 (ndarray): The points composing a sideline of a text
|
||||
polygon.
|
||||
sideline2 (ndarray): The points composing another sideline of a
|
||||
text polygon.
|
||||
resample_step (float): The resampled step size.
|
||||
|
||||
Returns:
|
||||
resampled_line1 (ndarray): The resampled line 1.
|
||||
resampled_line2 (ndarray): The resampled line 2.
|
||||
"""
|
||||
|
||||
assert sideline1.ndim == sideline2.ndim == 2
|
||||
assert sideline1.shape[1] == sideline2.shape[1] == 2
|
||||
assert sideline1.shape[0] >= 2
|
||||
assert sideline2.shape[0] >= 2
|
||||
assert isinstance(resample_step, float)
|
||||
|
||||
_, length1 = self.cal_curve_length(sideline1)
|
||||
_, length2 = self.cal_curve_length(sideline2)
|
||||
|
||||
avg_length = (length1 + length2) / 2
|
||||
resample_point_num = max(int(float(avg_length) / resample_step) + 1, 3)
|
||||
|
||||
resampled_line1 = self.resample_line(sideline1, resample_point_num)
|
||||
resampled_line2 = self.resample_line(sideline2, resample_point_num)
|
||||
|
||||
return resampled_line1, resampled_line2
|
||||
|
||||
def draw_center_region_maps(self, top_line, bot_line, center_line,
|
||||
center_region_mask, radius_map, sin_map,
|
||||
cos_map, region_shrink_ratio):
|
||||
"""Draw attributes on text center region.
|
||||
|
||||
Args:
|
||||
top_line (ndarray): The points composing top curved sideline of
|
||||
text polygon.
|
||||
bot_line (ndarray): The points composing bottom curved sideline
|
||||
of text polygon.
|
||||
center_line (ndarray): The points composing the center line of text
|
||||
instance.
|
||||
center_region_mask (ndarray): The text center region mask.
|
||||
radius_map (ndarray): The map where the distance from point to
|
||||
sidelines will be drawn on for each pixel in text center
|
||||
region.
|
||||
sin_map (ndarray): The map where vector_sin(theta) will be drawn
|
||||
on text center regions. Theta is the angle between tangent
|
||||
line and vector (1, 0).
|
||||
cos_map (ndarray): The map where vector_cos(theta) will be drawn on
|
||||
text center regions. Theta is the angle between tangent line
|
||||
and vector (1, 0).
|
||||
region_shrink_ratio (float): The shrink ratio of text center.
|
||||
"""
|
||||
|
||||
assert top_line.shape == bot_line.shape == center_line.shape
|
||||
assert (center_region_mask.shape == radius_map.shape == sin_map.shape
|
||||
== cos_map.shape)
|
||||
assert isinstance(region_shrink_ratio, float)
|
||||
for i in range(0, len(center_line) - 1):
|
||||
|
||||
top_mid_point = (top_line[i] + top_line[i + 1]) / 2
|
||||
bot_mid_point = (bot_line[i] + bot_line[i + 1]) / 2
|
||||
radius = norm(top_mid_point - bot_mid_point) / 2
|
||||
|
||||
text_direction = center_line[i + 1] - center_line[i]
|
||||
sin_theta = self.vector_sin(text_direction)
|
||||
cos_theta = self.vector_cos(text_direction)
|
||||
|
||||
tl = center_line[i] + (top_line[i] -
|
||||
center_line[i]) * region_shrink_ratio
|
||||
tr = center_line[i + 1] + (
|
||||
top_line[i + 1] - center_line[i + 1]) * region_shrink_ratio
|
||||
br = center_line[i + 1] + (
|
||||
bot_line[i + 1] - center_line[i + 1]) * region_shrink_ratio
|
||||
bl = center_line[i] + (bot_line[i] -
|
||||
center_line[i]) * region_shrink_ratio
|
||||
current_center_box = np.vstack([tl, tr, br, bl]).astype(np.int32)
|
||||
|
||||
cv2.fillPoly(center_region_mask, [current_center_box], color=1)
|
||||
cv2.fillPoly(sin_map, [current_center_box], color=sin_theta)
|
||||
cv2.fillPoly(cos_map, [current_center_box], color=cos_theta)
|
||||
cv2.fillPoly(radius_map, [current_center_box], color=radius)
|
||||
|
||||
def generate_center_mask_attrib_maps(self, img_size, text_polys):
|
||||
"""Generate text center region mask and geometric attribute maps.
|
||||
|
||||
Args:
|
||||
img_size (tuple): The image size of (height, width).
|
||||
text_polys (list[list[ndarray]]): The list of text polygons.
|
||||
|
||||
Returns:
|
||||
center_region_mask (ndarray): The text center region mask.
|
||||
radius_map (ndarray): The distance map from each pixel in text
|
||||
center region to top sideline.
|
||||
sin_map (ndarray): The sin(theta) map where theta is the angle
|
||||
between vector (top point - bottom point) and vector (1, 0).
|
||||
cos_map (ndarray): The cos(theta) map where theta is the angle
|
||||
between vector (top point - bottom point) and vector (1, 0).
|
||||
"""
|
||||
|
||||
assert isinstance(img_size, tuple)
|
||||
assert check_argument.is_2dlist(text_polys)
|
||||
|
||||
h, w = img_size
|
||||
|
||||
center_region_mask = np.zeros((h, w), np.uint8)
|
||||
radius_map = np.zeros((h, w), dtype=np.float32)
|
||||
sin_map = np.zeros((h, w), dtype=np.float32)
|
||||
cos_map = np.zeros((h, w), dtype=np.float32)
|
||||
|
||||
for poly in text_polys:
|
||||
assert len(poly) == 1
|
||||
text_instance = [[poly[0][i], poly[0][i + 1]]
|
||||
for i in range(0, len(poly[0]), 2)]
|
||||
polygon_points = np.array(text_instance).reshape(-1, 2)
|
||||
|
||||
n = len(polygon_points)
|
||||
keep_inds = []
|
||||
for i in range(n):
|
||||
if norm(polygon_points[i] -
|
||||
polygon_points[(i + 1) % n]) > 1e-5:
|
||||
keep_inds.append(i)
|
||||
polygon_points = polygon_points[keep_inds]
|
||||
|
||||
_, _, top_line, bot_line = self.reorder_poly_edge(polygon_points)
|
||||
resampled_top_line, resampled_bot_line = self.resample_sidelines(
|
||||
top_line, bot_line, self.resample_step)
|
||||
resampled_bot_line = resampled_bot_line[::-1]
|
||||
center_line = (resampled_top_line + resampled_bot_line) / 2
|
||||
|
||||
if self.vector_slope(center_line[-1] - center_line[0]) > 0.9:
|
||||
if (center_line[-1] - center_line[0])[1] < 0:
|
||||
center_line = center_line[::-1]
|
||||
resampled_top_line = resampled_top_line[::-1]
|
||||
resampled_bot_line = resampled_bot_line[::-1]
|
||||
else:
|
||||
if (center_line[-1] - center_line[0])[0] < 0:
|
||||
center_line = center_line[::-1]
|
||||
resampled_top_line = resampled_top_line[::-1]
|
||||
resampled_bot_line = resampled_bot_line[::-1]
|
||||
|
||||
line_head_shrink_len = norm(resampled_top_line[0] -
|
||||
resampled_bot_line[0]) / 4.0
|
||||
line_tail_shrink_len = norm(resampled_top_line[-1] -
|
||||
resampled_bot_line[-1]) / 4.0
|
||||
head_shrink_num = int(line_head_shrink_len // self.resample_step)
|
||||
tail_shrink_num = int(line_tail_shrink_len // self.resample_step)
|
||||
|
||||
if len(center_line) > head_shrink_num + tail_shrink_num + 2:
|
||||
center_line = center_line[head_shrink_num:len(center_line) -
|
||||
tail_shrink_num]
|
||||
resampled_top_line = resampled_top_line[
|
||||
head_shrink_num:len(resampled_top_line) - tail_shrink_num]
|
||||
resampled_bot_line = resampled_bot_line[
|
||||
head_shrink_num:len(resampled_bot_line) - tail_shrink_num]
|
||||
|
||||
self.draw_center_region_maps(resampled_top_line,
|
||||
resampled_bot_line, center_line,
|
||||
center_region_mask, radius_map,
|
||||
sin_map, cos_map,
|
||||
self.center_region_shrink_ratio)
|
||||
|
||||
return center_region_mask, radius_map, sin_map, cos_map
|
||||
|
||||
def generate_text_region_mask(self, img_size, text_polys):
|
||||
"""Generate text center region mask and geometry attribute maps.
|
||||
|
||||
Args:
|
||||
img_size (tuple): The image size (height, width).
|
||||
text_polys (list[list[ndarray]]): The list of text polygons.
|
||||
|
||||
Returns:
|
||||
text_region_mask (ndarray): The text region mask.
|
||||
"""
|
||||
|
||||
assert isinstance(img_size, tuple)
|
||||
assert check_argument.is_2dlist(text_polys)
|
||||
|
||||
h, w = img_size
|
||||
text_region_mask = np.zeros((h, w), dtype=np.uint8)
|
||||
|
||||
for poly in text_polys:
|
||||
assert len(poly) == 1
|
||||
text_instance = [[poly[0][i], poly[0][i + 1]]
|
||||
for i in range(0, len(poly[0]), 2)]
|
||||
polygon = np.array(
|
||||
text_instance, dtype=np.int32).reshape((1, -1, 2))
|
||||
cv2.fillPoly(text_region_mask, polygon, 1)
|
||||
|
||||
return text_region_mask
|
||||
|
||||
def generate_targets(self, results):
|
||||
"""Generate the gt targets for TextSnake.
|
||||
|
||||
Args:
|
||||
results (dict): The input result dictionary.
|
||||
|
||||
Returns:
|
||||
results (dict): The output result dictionary.
|
||||
"""
|
||||
|
||||
assert isinstance(results, dict)
|
||||
|
||||
polygon_masks = results['gt_masks'].masks
|
||||
polygon_masks_ignore = results['gt_masks_ignore'].masks
|
||||
|
||||
h, w, _ = results['img_shape']
|
||||
|
||||
gt_text_mask = self.generate_text_region_mask((h, w), polygon_masks)
|
||||
gt_mask = self.generate_effective_mask((h, w), polygon_masks_ignore)
|
||||
|
||||
(gt_center_region_mask, gt_radius_map, gt_sin_map,
|
||||
gt_cos_map) = self.generate_center_mask_attrib_maps((h, w),
|
||||
polygon_masks)
|
||||
|
||||
results['mask_fields'].clear() # rm gt_masks encoded by polygons
|
||||
mapping = {
|
||||
'gt_text_mask': gt_text_mask,
|
||||
'gt_center_region_mask': gt_center_region_mask,
|
||||
'gt_mask': gt_mask,
|
||||
'gt_radius_map': gt_radius_map,
|
||||
'gt_sin_map': gt_sin_map,
|
||||
'gt_cos_map': gt_cos_map
|
||||
}
|
||||
for key, value in mapping.items():
|
||||
value = value if isinstance(value, list) else [value]
|
||||
results[key] = BitmapMasks(value, h, w)
|
||||
results['mask_fields'].append(key)
|
||||
|
||||
return results
|
@ -1,97 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from mmdet.datasets.pipelines.transforms import Resize
|
||||
|
||||
from mmocr.registry import TRANSFORMS
|
||||
from mmocr.utils import check_argument
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class ScaleAspectJitter(Resize):
|
||||
"""Resize image and segmentation mask encoded by coordinates.
|
||||
|
||||
Allowed resize types are `around_min_img_scale`, `long_short_bound`, and
|
||||
`indep_sample_in_range`.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
img_scale=None,
|
||||
multiscale_mode='range',
|
||||
ratio_range=None,
|
||||
keep_ratio=False,
|
||||
resize_type='around_min_img_scale',
|
||||
aspect_ratio_range=None,
|
||||
long_size_bound=None,
|
||||
short_size_bound=None,
|
||||
scale_range=None):
|
||||
super().__init__(
|
||||
img_scale=img_scale,
|
||||
multiscale_mode=multiscale_mode,
|
||||
ratio_range=ratio_range,
|
||||
keep_ratio=keep_ratio)
|
||||
assert not keep_ratio
|
||||
assert resize_type in [
|
||||
'around_min_img_scale', 'long_short_bound', 'indep_sample_in_range'
|
||||
]
|
||||
self.resize_type = resize_type
|
||||
|
||||
if resize_type == 'indep_sample_in_range':
|
||||
assert ratio_range is None
|
||||
assert aspect_ratio_range is None
|
||||
assert short_size_bound is None
|
||||
assert long_size_bound is None
|
||||
assert scale_range is not None
|
||||
else:
|
||||
assert scale_range is None
|
||||
assert isinstance(ratio_range, tuple)
|
||||
assert isinstance(aspect_ratio_range, tuple)
|
||||
assert check_argument.equal_len(ratio_range, aspect_ratio_range)
|
||||
|
||||
if resize_type in ['long_short_bound']:
|
||||
assert short_size_bound is not None
|
||||
assert long_size_bound is not None
|
||||
|
||||
self.aspect_ratio_range = aspect_ratio_range
|
||||
self.long_size_bound = long_size_bound
|
||||
self.short_size_bound = short_size_bound
|
||||
self.scale_range = scale_range
|
||||
|
||||
@staticmethod
|
||||
def sample_from_range(range):
|
||||
assert len(range) == 2
|
||||
min_value, max_value = min(range), max(range)
|
||||
value = np.random.random_sample() * (max_value - min_value) + min_value
|
||||
|
||||
return value
|
||||
|
||||
def _random_scale(self, results):
|
||||
|
||||
if self.resize_type == 'indep_sample_in_range':
|
||||
w = self.sample_from_range(self.scale_range)
|
||||
h = self.sample_from_range(self.scale_range)
|
||||
results['scale'] = (int(w), int(h)) # (w,h)
|
||||
results['scale_idx'] = None
|
||||
return
|
||||
h, w = results['img'].shape[0:2]
|
||||
if self.resize_type == 'long_short_bound':
|
||||
scale1 = 1
|
||||
if max(h, w) > self.long_size_bound:
|
||||
scale1 = self.long_size_bound / max(h, w)
|
||||
scale2 = self.sample_from_range(self.ratio_range)
|
||||
scale = scale1 * scale2
|
||||
if min(h, w) * scale <= self.short_size_bound:
|
||||
scale = (self.short_size_bound + 10) * 1.0 / min(h, w)
|
||||
elif self.resize_type == 'around_min_img_scale':
|
||||
short_size = min(self.img_scale[0])
|
||||
ratio = self.sample_from_range(self.ratio_range)
|
||||
scale = (ratio * short_size) / min(h, w)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
aspect = self.sample_from_range(self.aspect_ratio_range)
|
||||
h_scale = scale * math.sqrt(aspect)
|
||||
w_scale = scale / math.sqrt(aspect)
|
||||
results['scale'] = (int(w * w_scale), int(h * h_scale)) # (w,h)
|
||||
results['scale_idx'] = None
|
@ -1,152 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import copy
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
from mmcv.utils import print_log
|
||||
from mmdet.datasets import ConcatDataset
|
||||
|
||||
from mmocr.registry import DATASETS
|
||||
from mmocr.utils import is_2dlist, is_type_list
|
||||
|
||||
|
||||
@DATASETS.register_module()
|
||||
class UniformConcatDataset(ConcatDataset):
|
||||
"""A wrapper of ConcatDataset which support dataset pipeline assignment and
|
||||
replacement.
|
||||
|
||||
Args:
|
||||
datasets (list[dict] | list[list[dict]]): A list of datasets cfgs.
|
||||
separate_eval (bool): Whether to evaluate the results
|
||||
separately if it is used as validation dataset.
|
||||
Defaults to True.
|
||||
show_mean_scores (str | bool): Whether to compute the mean evaluation
|
||||
results, only applicable when ``separate_eval=True``. Options are
|
||||
[True, False, ``auto``]. If ``True``, mean results will be added to
|
||||
the result dictionary with keys in the form of
|
||||
``mean_{metric_name}``. If 'auto', mean results will be shown only
|
||||
when more than 1 dataset is wrapped.
|
||||
pipeline (None | list[dict] | list[list[dict]]): If ``None``,
|
||||
each dataset in datasets use its own pipeline;
|
||||
If ``list[dict]``, it will be assigned to the dataset whose
|
||||
pipeline is None in datasets;
|
||||
If ``list[list[dict]]``, pipeline of dataset which is None
|
||||
in datasets will be replaced by the corresponding pipeline
|
||||
in the list.
|
||||
force_apply (bool): If True, apply pipeline above to each dataset
|
||||
even if it have its own pipeline. Default: False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
datasets,
|
||||
separate_eval=True,
|
||||
show_mean_scores='auto',
|
||||
pipeline=None,
|
||||
force_apply=False,
|
||||
**kwargs):
|
||||
new_datasets = []
|
||||
if pipeline is not None:
|
||||
assert isinstance(
|
||||
pipeline,
|
||||
list), 'pipeline must be list[dict] or list[list[dict]].'
|
||||
if is_type_list(pipeline, dict):
|
||||
self._apply_pipeline(datasets, pipeline, force_apply)
|
||||
new_datasets = datasets
|
||||
elif is_2dlist(pipeline):
|
||||
assert is_2dlist(datasets)
|
||||
assert len(datasets) == len(pipeline)
|
||||
for sub_datasets, tmp_pipeline in zip(datasets, pipeline):
|
||||
self._apply_pipeline(sub_datasets, tmp_pipeline,
|
||||
force_apply)
|
||||
new_datasets.extend(sub_datasets)
|
||||
else:
|
||||
if is_2dlist(datasets):
|
||||
for sub_datasets in datasets:
|
||||
new_datasets.extend(sub_datasets)
|
||||
else:
|
||||
new_datasets = datasets
|
||||
datasets = [DATASETS.build(c, kwargs) for c in new_datasets]
|
||||
super().__init__(datasets, separate_eval)
|
||||
|
||||
if not separate_eval:
|
||||
raise NotImplementedError(
|
||||
'Evaluating datasets as a whole is not'
|
||||
' supported yet. Please use "separate_eval=True"')
|
||||
|
||||
assert isinstance(show_mean_scores, bool) or show_mean_scores == 'auto'
|
||||
if show_mean_scores == 'auto':
|
||||
show_mean_scores = len(self.datasets) > 1
|
||||
self.show_mean_scores = show_mean_scores
|
||||
if show_mean_scores is True or show_mean_scores == 'auto' and len(
|
||||
self.datasets) > 1:
|
||||
if len({type(ds) for ds in self.datasets}) != 1:
|
||||
raise NotImplementedError(
|
||||
'To compute mean evaluation scores, all datasets'
|
||||
'must have the same type')
|
||||
|
||||
@staticmethod
|
||||
def _apply_pipeline(datasets, pipeline, force_apply=False):
|
||||
from_cfg = all(isinstance(x, dict) for x in datasets)
|
||||
assert from_cfg, 'datasets should be config dicts'
|
||||
assert all(isinstance(x, dict) for x in pipeline)
|
||||
for dataset in datasets:
|
||||
if dataset['pipeline'] is None or force_apply:
|
||||
dataset['pipeline'] = copy.deepcopy(pipeline)
|
||||
|
||||
def evaluate(self, results, logger=None, **kwargs):
|
||||
"""Evaluate the results.
|
||||
|
||||
Args:
|
||||
results (list[list | tuple]): Testing results of the dataset.
|
||||
logger (logging.Logger | str | None): Logger used for printing
|
||||
related information during evaluation. Default: None.
|
||||
|
||||
Returns:
|
||||
dict[str: float]: Results of each separate
|
||||
dataset if `self.separate_eval=True`.
|
||||
"""
|
||||
assert len(results) == self.cumulative_sizes[-1], \
|
||||
('Dataset and results have different sizes: '
|
||||
f'{self.cumulative_sizes[-1]} v.s. {len(results)}')
|
||||
|
||||
# Check whether all the datasets support evaluation
|
||||
for dataset in self.datasets:
|
||||
assert hasattr(dataset, 'evaluate'), \
|
||||
f'{type(dataset)} does not implement evaluate function'
|
||||
|
||||
if self.separate_eval:
|
||||
dataset_idx = -1
|
||||
|
||||
total_eval_results = dict()
|
||||
|
||||
if self.show_mean_scores:
|
||||
mean_eval_results = defaultdict(list)
|
||||
|
||||
for dataset in self.datasets:
|
||||
start_idx = 0 if dataset_idx == -1 else \
|
||||
self.cumulative_sizes[dataset_idx]
|
||||
end_idx = self.cumulative_sizes[dataset_idx + 1]
|
||||
|
||||
results_per_dataset = results[start_idx:end_idx]
|
||||
print_log(
|
||||
f'\nEvaluating {dataset.ann_file} with '
|
||||
f'{len(results_per_dataset)} images now',
|
||||
logger=logger)
|
||||
|
||||
eval_results_per_dataset = dataset.evaluate(
|
||||
results_per_dataset, logger=logger, **kwargs)
|
||||
dataset_idx += 1
|
||||
for k, v in eval_results_per_dataset.items():
|
||||
total_eval_results.update({f'{dataset_idx}_{k}': v})
|
||||
if self.show_mean_scores:
|
||||
mean_eval_results[k].append(v)
|
||||
|
||||
if self.show_mean_scores:
|
||||
for k, v in mean_eval_results.items():
|
||||
total_eval_results[f'mean_{k}'] = np.mean(v)
|
||||
|
||||
return total_eval_results
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
'Evaluating datasets as a whole is not'
|
||||
' supported yet. Please use "separate_eval=True"')
|
@ -1,75 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from mmocr.datasets.base_dataset import BaseDataset
|
||||
|
||||
|
||||
def _create_dummy_ann_file(ann_file):
|
||||
ann_info1 = 'sample1.jpg hello'
|
||||
ann_info2 = 'sample2.jpg world'
|
||||
|
||||
with open(ann_file, 'w') as fw:
|
||||
for ann_info in [ann_info1, ann_info2]:
|
||||
fw.write(ann_info + '\n')
|
||||
|
||||
|
||||
def _create_dummy_loader():
|
||||
loader = dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(type='LineStrParser', keys=['file_name', 'text']))
|
||||
return loader
|
||||
|
||||
|
||||
def test_custom_dataset():
|
||||
tmp_dir = tempfile.TemporaryDirectory()
|
||||
# create dummy data
|
||||
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
|
||||
_create_dummy_ann_file(ann_file)
|
||||
loader = _create_dummy_loader()
|
||||
|
||||
for mode in [True, False]:
|
||||
dataset = BaseDataset(ann_file, loader, pipeline=[], test_mode=mode)
|
||||
|
||||
# test len
|
||||
assert len(dataset) == len(dataset.data_infos)
|
||||
|
||||
# test set group flag
|
||||
assert np.allclose(dataset.flag, [0, 0])
|
||||
|
||||
# test prepare_train_img
|
||||
expect_results = {
|
||||
'img_info': {
|
||||
'file_name': 'sample1.jpg',
|
||||
'text': 'hello'
|
||||
},
|
||||
'img_prefix': ''
|
||||
}
|
||||
assert dataset.prepare_train_img(0) == expect_results
|
||||
|
||||
# test prepare_test_img
|
||||
assert dataset.prepare_test_img(0) == expect_results
|
||||
|
||||
# test __getitem__
|
||||
assert dataset[0] == expect_results
|
||||
|
||||
# test get_next_index
|
||||
assert dataset._get_next_index(0) == 1
|
||||
|
||||
# test format_resuls
|
||||
expect_results_copy = {
|
||||
key: value
|
||||
for key, value in expect_results.items()
|
||||
}
|
||||
dataset.format_results(expect_results)
|
||||
assert expect_results_copy == expect_results
|
||||
|
||||
# test evaluate
|
||||
with pytest.raises(NotImplementedError):
|
||||
dataset.evaluate(expect_results)
|
||||
|
||||
tmp_dir.cleanup()
|
@ -1,18 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import numpy as np
|
||||
|
||||
import mmocr.datasets.pipelines.dbnet_transforms as transforms
|
||||
|
||||
|
||||
def test_eastrandomcrop():
|
||||
crop = transforms.EastRandomCrop(target_size=(60, 60), max_tries=100)
|
||||
img = np.random.rand(3, 100, 200)
|
||||
poly = np.array([[[0, 0, 50, 0, 50, 50, 0, 50]],
|
||||
[[20, 20, 50, 20, 50, 50, 20, 50]]])
|
||||
box = np.array([[0, 0, 50, 50], [20, 20, 50, 50]])
|
||||
results = dict(img=img, gt_masks=poly, bboxes=box)
|
||||
results['mask_fields'] = ['gt_masks']
|
||||
results['bbox_fields'] = ['bboxes']
|
||||
results = crop(results)
|
||||
assert np.allclose(results['bboxes'][0],
|
||||
results['gt_masks'].masks[0][0][[0, 2]].flatten())
|
@ -1,84 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import json
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mmocr.datasets.text_det_dataset import TextDetDataset
|
||||
|
||||
|
||||
def _create_dummy_ann_file(ann_file):
|
||||
ann_info1 = {
|
||||
'file_name':
|
||||
'sample1.jpg',
|
||||
'height':
|
||||
640,
|
||||
'width':
|
||||
640,
|
||||
'annotations': [{
|
||||
'iscrowd': 0,
|
||||
'category_id': 1,
|
||||
'bbox': [50, 70, 80, 100],
|
||||
'segmentation': [[50, 70, 80, 70, 80, 100, 50, 100]]
|
||||
}, {
|
||||
'iscrowd':
|
||||
1,
|
||||
'category_id':
|
||||
1,
|
||||
'bbox': [120, 140, 200, 200],
|
||||
'segmentation': [[120, 140, 200, 140, 200, 200, 120, 200]]
|
||||
}]
|
||||
}
|
||||
|
||||
with open(ann_file, 'w') as fw:
|
||||
fw.write(json.dumps(ann_info1) + '\n')
|
||||
|
||||
|
||||
def _create_dummy_loader():
|
||||
loader = dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations']))
|
||||
return loader
|
||||
|
||||
|
||||
def test_detect_dataset():
|
||||
tmp_dir = tempfile.TemporaryDirectory()
|
||||
# create dummy data
|
||||
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
|
||||
_create_dummy_ann_file(ann_file)
|
||||
|
||||
# test initialization
|
||||
loader = _create_dummy_loader()
|
||||
dataset = TextDetDataset(ann_file, loader, pipeline=[])
|
||||
|
||||
# test _parse_ann_info
|
||||
img_ann_info = dataset.data_infos[0]
|
||||
ann = dataset._parse_anno_info(img_ann_info['annotations'])
|
||||
print(ann['bboxes'])
|
||||
assert np.allclose(ann['bboxes'], [[50., 70., 80., 100.]])
|
||||
assert np.allclose(ann['labels'], [1])
|
||||
assert np.allclose(ann['bboxes_ignore'], [[120, 140, 200, 200]])
|
||||
assert np.allclose(ann['masks'], [[[50, 70, 80, 70, 80, 100, 50, 100]]])
|
||||
assert np.allclose(ann['masks_ignore'],
|
||||
[[[120, 140, 200, 140, 200, 200, 120, 200]]])
|
||||
|
||||
tmp_dir.cleanup()
|
||||
|
||||
# test prepare_train_img
|
||||
pipeline_results = dataset.prepare_train_img(0)
|
||||
assert np.allclose(pipeline_results['bbox_fields'], [])
|
||||
assert np.allclose(pipeline_results['mask_fields'], [])
|
||||
assert np.allclose(pipeline_results['seg_fields'], [])
|
||||
expect_img_info = {'filename': 'sample1.jpg', 'height': 640, 'width': 640}
|
||||
assert pipeline_results['img_info'] == expect_img_info
|
||||
|
||||
# test evluation
|
||||
metrics = 'hmean-iou'
|
||||
results = [{'boundary_result': [[50, 70, 80, 70, 80, 100, 50, 100, 1]]}]
|
||||
eval_res = dataset.evaluate(results, metrics)
|
||||
|
||||
assert eval_res['hmean-iou:hmean'] == 1
|
@ -1,171 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
|
||||
import mmcv
|
||||
import numpy as np
|
||||
|
||||
from mmocr.datasets.icdar_dataset import IcdarDataset
|
||||
|
||||
|
||||
def _create_dummy_icdar_json(json_name):
|
||||
image_1 = {
|
||||
'id': 0,
|
||||
'width': 640,
|
||||
'height': 640,
|
||||
'file_name': 'fake_name.jpg',
|
||||
}
|
||||
image_2 = {
|
||||
'id': 1,
|
||||
'width': 640,
|
||||
'height': 640,
|
||||
'file_name': 'fake_name1.jpg',
|
||||
}
|
||||
|
||||
annotation_1 = {
|
||||
'id': 1,
|
||||
'image_id': 0,
|
||||
'category_id': 0,
|
||||
'area': 400,
|
||||
'bbox': [50, 60, 20, 20],
|
||||
'iscrowd': 0,
|
||||
'segmentation': [[50, 60, 70, 60, 70, 80, 50, 80]]
|
||||
}
|
||||
|
||||
annotation_2 = {
|
||||
'id': 2,
|
||||
'image_id': 0,
|
||||
'category_id': 0,
|
||||
'area': 900,
|
||||
'bbox': [100, 120, 30, 30],
|
||||
'iscrowd': 0,
|
||||
'segmentation': [[100, 120, 130, 120, 120, 150, 100, 150]]
|
||||
}
|
||||
|
||||
annotation_3 = {
|
||||
'id': 3,
|
||||
'image_id': 0,
|
||||
'category_id': 0,
|
||||
'area': 1600,
|
||||
'bbox': [150, 160, 40, 40],
|
||||
'iscrowd': 1,
|
||||
'segmentation': [[150, 160, 190, 160, 190, 200, 150, 200]]
|
||||
}
|
||||
|
||||
annotation_4 = {
|
||||
'id': 4,
|
||||
'image_id': 0,
|
||||
'category_id': 0,
|
||||
'area': 10000,
|
||||
'bbox': [250, 260, 100, 100],
|
||||
'iscrowd': 1,
|
||||
'segmentation': [[250, 260, 350, 260, 350, 360, 250, 360]]
|
||||
}
|
||||
annotation_5 = {
|
||||
'id': 5,
|
||||
'image_id': 1,
|
||||
'category_id': 0,
|
||||
'area': 10000,
|
||||
'bbox': [250, 260, 100, 100],
|
||||
'iscrowd': 1,
|
||||
'segmentation': [[250, 260, 350, 260, 350, 360, 250, 360]]
|
||||
}
|
||||
|
||||
categories = [{
|
||||
'id': 0,
|
||||
'name': 'text',
|
||||
'supercategory': 'text',
|
||||
}]
|
||||
|
||||
fake_json = {
|
||||
'images': [image_1, image_2],
|
||||
'annotations':
|
||||
[annotation_1, annotation_2, annotation_3, annotation_4, annotation_5],
|
||||
'categories':
|
||||
categories
|
||||
}
|
||||
|
||||
mmcv.dump(fake_json, json_name)
|
||||
|
||||
|
||||
def test_icdar_dataset():
|
||||
tmp_dir = tempfile.TemporaryDirectory()
|
||||
# create dummy data
|
||||
fake_json_file = osp.join(tmp_dir.name, 'fake_data.json')
|
||||
_create_dummy_icdar_json(fake_json_file)
|
||||
|
||||
# test initialization
|
||||
dataset = IcdarDataset(ann_file=fake_json_file, pipeline=[])
|
||||
assert dataset.CLASSES == ('text')
|
||||
assert dataset.img_ids == [0, 1]
|
||||
assert dataset.select_first_k == -1
|
||||
|
||||
# test _parse_ann_info
|
||||
ann = dataset.get_ann_info(0)
|
||||
assert np.allclose(ann['bboxes'],
|
||||
[[50., 60., 70., 80.], [100., 120., 130., 150.]])
|
||||
assert np.allclose(ann['labels'], [0, 0])
|
||||
assert np.allclose(ann['bboxes_ignore'],
|
||||
[[150., 160., 190., 200.], [250., 260., 350., 360.]])
|
||||
assert np.allclose(ann['masks'],
|
||||
[[[50, 60, 70, 60, 70, 80, 50, 80]],
|
||||
[[100, 120, 130, 120, 120, 150, 100, 150]]])
|
||||
assert np.allclose(ann['masks_ignore'],
|
||||
[[[150, 160, 190, 160, 190, 200, 150, 200]],
|
||||
[[250, 260, 350, 260, 350, 360, 250, 360]]])
|
||||
assert dataset.cat_ids == [0]
|
||||
|
||||
tmp_dir.cleanup()
|
||||
|
||||
# test rank output
|
||||
# result = [[]]
|
||||
# out_file = tempfile.NamedTemporaryFile().name
|
||||
|
||||
# with pytest.raises(AssertionError):
|
||||
# dataset.output_ranklist(result, out_file)
|
||||
|
||||
# result = [{'hmean': 1}, {'hmean': 0.5}]
|
||||
|
||||
# output = dataset.output_ranklist(result, out_file)
|
||||
|
||||
# assert output[0]['hmean'] == 0.5
|
||||
|
||||
# test get_gt_mask
|
||||
# output = dataset.get_gt_mask()
|
||||
# assert np.allclose(output[0][0],
|
||||
# [[50, 60, 70, 60, 70, 80, 50, 80],
|
||||
# [100, 120, 130, 120, 120, 150, 100, 150]])
|
||||
# assert output[0][1] == []
|
||||
# assert np.allclose(output[1][0],
|
||||
# [[150, 160, 190, 160, 190, 200, 150, 200],
|
||||
# [250, 260, 350, 260, 350, 360, 250, 360]])
|
||||
# assert np.allclose(output[1][1],
|
||||
# [[250, 260, 350, 260, 350, 360, 250, 360]])
|
||||
|
||||
# test evluation
|
||||
metrics = ['hmean-iou', 'hmean-ic13']
|
||||
results = [{
|
||||
'boundary_result': [[50, 60, 70, 60, 70, 80, 50, 80, 1],
|
||||
[100, 120, 130, 120, 120, 150, 100, 150, 1]]
|
||||
}, {
|
||||
'boundary_result': []
|
||||
}]
|
||||
output = dataset.evaluate(results, metrics)
|
||||
assert output['hmean-iou:hmean'] == 1
|
||||
assert output['hmean-ic13:hmean'] == 1
|
||||
|
||||
results = [{
|
||||
'boundary_result': [[50, 60, 70, 60, 70, 80, 50, 80, 0.5],
|
||||
[100, 120, 130, 120, 120, 150, 100, 150, 1]]
|
||||
}, {
|
||||
'boundary_result': []
|
||||
}]
|
||||
output = dataset.evaluate(
|
||||
results, metrics, min_score_thr=0, max_score_thr=1, step=0.5)
|
||||
assert output['hmean-iou:hmean'] == 1
|
||||
assert output['hmean-ic13:hmean'] == 1
|
||||
|
||||
output = dataset.evaluate(
|
||||
results, metrics, min_score_thr=0.6, max_score_thr=1, step=0.5)
|
||||
assert output['hmean-iou:hmean'] == 1 / 1.5
|
||||
assert output['hmean-ic13:hmean'] == 1 / 1.5
|
@ -1,128 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import json
|
||||
import math
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from mmocr.datasets.kie_dataset import KIEDataset
|
||||
|
||||
|
||||
def _create_dummy_ann_file(ann_file):
|
||||
ann_info1 = {
|
||||
'file_name':
|
||||
'sample1.png',
|
||||
'height':
|
||||
200,
|
||||
'width':
|
||||
200,
|
||||
'annotations': [{
|
||||
'text': 'store',
|
||||
'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0],
|
||||
'label': 1
|
||||
}, {
|
||||
'text': 'address',
|
||||
'box': [23.0, 2.0, 31.0, 1.0, 24.0, 11.0, 16.0, 11.0],
|
||||
'label': 1
|
||||
}, {
|
||||
'text': 'price',
|
||||
'box': [33.0, 2.0, 43.0, 2.0, 36.0, 12.0, 25.0, 12.0],
|
||||
'label': 1
|
||||
}, {
|
||||
'text': '1.0',
|
||||
'box': [46.0, 2.0, 61.0, 2.0, 53.0, 12.0, 39.0, 12.0],
|
||||
'label': 1
|
||||
}, {
|
||||
'text': 'google',
|
||||
'box': [61.0, 2.0, 69.0, 2.0, 63.0, 12.0, 55.0, 12.0],
|
||||
'label': 1
|
||||
}]
|
||||
}
|
||||
with open(ann_file, 'w') as fw:
|
||||
for ann_info in [ann_info1]:
|
||||
fw.write(json.dumps(ann_info) + '\n')
|
||||
|
||||
return ann_info1
|
||||
|
||||
|
||||
def _create_dummy_dict_file(dict_file):
|
||||
dict_str = '0123'
|
||||
with open(dict_file, 'w') as fw:
|
||||
for char in list(dict_str):
|
||||
fw.write(char + '\n')
|
||||
|
||||
return dict_str
|
||||
|
||||
|
||||
def _create_dummy_loader():
|
||||
loader = dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations']))
|
||||
return loader
|
||||
|
||||
|
||||
def test_kie_dataset():
|
||||
tmp_dir = tempfile.TemporaryDirectory()
|
||||
# create dummy data
|
||||
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
|
||||
ann_info1 = _create_dummy_ann_file(ann_file)
|
||||
|
||||
dict_file = osp.join(tmp_dir.name, 'fake_dict.txt')
|
||||
_create_dummy_dict_file(dict_file)
|
||||
|
||||
# test initialization
|
||||
loader = _create_dummy_loader()
|
||||
dataset = KIEDataset(ann_file, loader, dict_file, pipeline=[])
|
||||
|
||||
tmp_dir.cleanup()
|
||||
|
||||
dataset.prepare_train_img(0)
|
||||
|
||||
# test pre_pipeline
|
||||
img_ann_info = dataset.data_infos[0]
|
||||
img_info = {
|
||||
'filename': img_ann_info['file_name'],
|
||||
'height': img_ann_info['height'],
|
||||
'width': img_ann_info['width']
|
||||
}
|
||||
ann_info = dataset._parse_anno_info(img_ann_info['annotations'])
|
||||
results = dict(img_info=img_info, ann_info=ann_info)
|
||||
dataset.pre_pipeline(results)
|
||||
assert results['img_prefix'] == dataset.img_prefix
|
||||
|
||||
# test _parse_anno_info
|
||||
annos = ann_info1['annotations']
|
||||
with pytest.raises(AssertionError):
|
||||
dataset._parse_anno_info(annos[0])
|
||||
tmp_annos = [{
|
||||
'text': 'store',
|
||||
'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0]
|
||||
}]
|
||||
dataset._parse_anno_info(tmp_annos)
|
||||
tmp_annos = [{'text': 'store'}]
|
||||
with pytest.raises(AssertionError):
|
||||
dataset._parse_anno_info(tmp_annos)
|
||||
|
||||
return_anno = dataset._parse_anno_info(annos)
|
||||
assert 'bboxes' in return_anno
|
||||
assert 'relations' in return_anno
|
||||
assert 'texts' in return_anno
|
||||
assert 'labels' in return_anno
|
||||
|
||||
# test evaluation
|
||||
result = {}
|
||||
result['nodes'] = torch.full((5, 5), 1, dtype=torch.float)
|
||||
result['nodes'][:, 1] = 100.
|
||||
print('hello', result['nodes'].size())
|
||||
results = [result for _ in range(5)]
|
||||
|
||||
eval_res = dataset.evaluate(results)
|
||||
assert math.isclose(eval_res['macro_f1'], 0.2, abs_tol=1e-4)
|
||||
|
||||
|
||||
test_kie_dataset()
|
@ -1,96 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import json
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
from mmocr.datasets.utils.backend import (HardDiskAnnFileBackend,
|
||||
HTTPAnnFileBackend,
|
||||
PetrelAnnFileBackend)
|
||||
from mmocr.datasets.utils.loader import AnnFileLoader
|
||||
from mmocr.utils import recog2lmdb
|
||||
|
||||
|
||||
def _create_dummy_line_str_file(ann_file):
|
||||
ann_info1 = 'sample1.jpg hello'
|
||||
ann_info2 = 'sample2.jpg world'
|
||||
|
||||
with open(ann_file, 'w') as fw:
|
||||
for ann_info in [ann_info1, ann_info2]:
|
||||
fw.write(ann_info + '\n')
|
||||
|
||||
|
||||
def _create_dummy_line_json_file(ann_file):
|
||||
ann_info1 = {'filename': 'sample1.jpg', 'text': 'hello'}
|
||||
ann_info2 = {'filename': 'sample2.jpg', 'text': 'world'}
|
||||
|
||||
with open(ann_file, 'w') as fw:
|
||||
for ann_info in [ann_info1, ann_info2]:
|
||||
fw.write(json.dumps(ann_info) + '\n')
|
||||
|
||||
|
||||
def test_loader():
|
||||
tmp_dir = tempfile.TemporaryDirectory()
|
||||
# create dummy data
|
||||
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
|
||||
_create_dummy_line_str_file(ann_file)
|
||||
|
||||
parser = dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
AnnFileLoader(ann_file, parser, repeat=0)
|
||||
with pytest.raises(AssertionError):
|
||||
AnnFileLoader(ann_file, [], repeat=1)
|
||||
|
||||
# test text loader and line str parser
|
||||
text_loader = AnnFileLoader(ann_file, parser, repeat=1, file_format='txt')
|
||||
assert len(text_loader) == 2
|
||||
assert text_loader.ori_data_infos[0] == 'sample1.jpg hello'
|
||||
assert text_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
|
||||
|
||||
# test text loader and linedict parser
|
||||
_create_dummy_line_json_file(ann_file)
|
||||
json_parser = dict(type='LineJsonParser', keys=['filename', 'text'])
|
||||
text_loader = AnnFileLoader(
|
||||
ann_file, json_parser, repeat=1, file_format='txt')
|
||||
assert text_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
|
||||
|
||||
# test text loader and linedict parser
|
||||
_create_dummy_line_json_file(ann_file)
|
||||
json_parser = dict(type='LineJsonParser', keys=['filename', 'text'])
|
||||
text_loader = AnnFileLoader(
|
||||
ann_file, json_parser, repeat=1, file_format='txt')
|
||||
it = iter(text_loader)
|
||||
with pytest.raises(StopIteration):
|
||||
for _ in range(len(text_loader) + 1):
|
||||
next(it)
|
||||
|
||||
# test lmdb loader and line json parser
|
||||
_create_dummy_line_str_file(ann_file)
|
||||
lmdb_file = osp.join(tmp_dir.name, 'fake_data.lmdb')
|
||||
recog2lmdb(
|
||||
img_root=None,
|
||||
label_path=ann_file,
|
||||
label_only=True,
|
||||
output=lmdb_file,
|
||||
lmdb_map_size=102400)
|
||||
|
||||
parser = dict(type='LineJsonParser', keys=['filename', 'text'])
|
||||
lmdb_loader = AnnFileLoader(
|
||||
lmdb_file, parser, repeat=1, file_format='lmdb')
|
||||
assert lmdb_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
|
||||
lmdb_loader.close()
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
HardDiskAnnFileBackend(file_format='json')
|
||||
with pytest.raises(AssertionError):
|
||||
PetrelAnnFileBackend(file_format='json')
|
||||
with pytest.raises(AssertionError):
|
||||
HTTPAnnFileBackend(file_format='json')
|
||||
|
||||
tmp_dir.cleanup()
|
@ -1,86 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mmocr.datasets.pipelines import LoadImageFromNdarray, LoadTextAnnotations
|
||||
|
||||
|
||||
def _create_dummy_ann():
|
||||
results = {}
|
||||
results['img_info'] = {}
|
||||
results['img_info']['height'] = 1000
|
||||
results['img_info']['width'] = 1000
|
||||
results['ann_info'] = {}
|
||||
results['ann_info']['masks'] = []
|
||||
results['mask_fields'] = []
|
||||
results['ann_info']['masks_ignore'] = [
|
||||
[[499, 94, 531, 94, 531, 124, 499, 124]],
|
||||
[[3, 156, 81, 155, 78, 181, 0, 182]],
|
||||
[[11, 223, 59, 221, 59, 234, 11, 236]],
|
||||
[[500, 156, 551, 156, 550, 165, 499, 165]]
|
||||
]
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_loadtextannotation():
|
||||
|
||||
results = _create_dummy_ann()
|
||||
with_bbox = True
|
||||
with_label = True
|
||||
with_mask = True
|
||||
with_seg = False
|
||||
poly2mask = False
|
||||
|
||||
# If no 'ori_shape' in result but use_img_shape=True,
|
||||
# result['img_info']['height'] and result['img_info']['width']
|
||||
# will be used to generate mask.
|
||||
loader = LoadTextAnnotations(
|
||||
with_bbox,
|
||||
with_label,
|
||||
with_mask,
|
||||
with_seg,
|
||||
poly2mask,
|
||||
use_img_shape=True)
|
||||
tmp_results = copy.deepcopy(results)
|
||||
output = loader._load_masks(tmp_results)
|
||||
assert len(output['gt_masks_ignore']) == 4
|
||||
assert np.allclose(output['gt_masks_ignore'].masks[0],
|
||||
[[499, 94, 531, 94, 531, 124, 499, 124]])
|
||||
assert output['gt_masks_ignore'].height == results['img_info']['height']
|
||||
|
||||
# If 'ori_shape' in result and use_img_shape=True,
|
||||
# result['ori_shape'] will be used to generate mask.
|
||||
loader = LoadTextAnnotations(
|
||||
with_bbox,
|
||||
with_label,
|
||||
with_mask,
|
||||
with_seg,
|
||||
poly2mask=True,
|
||||
use_img_shape=True)
|
||||
tmp_results = copy.deepcopy(results)
|
||||
tmp_results['ori_shape'] = (640, 640, 3)
|
||||
output = loader._load_masks(tmp_results)
|
||||
assert output['img_info']['height'] == 640
|
||||
assert output['gt_masks_ignore'].height == 640
|
||||
|
||||
|
||||
def test_load_img_from_numpy():
|
||||
result = {'img': np.ones((32, 100, 3), dtype=np.uint8)}
|
||||
|
||||
load = LoadImageFromNdarray(color_type='color')
|
||||
output = load(result)
|
||||
|
||||
assert output['img'].shape[2] == 3
|
||||
assert len(output['img'].shape) == 3
|
||||
|
||||
result = {'img': np.ones((32, 100, 1), dtype=np.uint8)}
|
||||
load = LoadImageFromNdarray(color_type='color')
|
||||
output = load(result)
|
||||
assert output['img'].shape[2] == 3
|
||||
|
||||
result = {'img': np.ones((32, 100, 3), dtype=np.uint8)}
|
||||
load = LoadImageFromNdarray(color_type='grayscale', to_float32=True)
|
||||
output = load(result)
|
||||
assert output['img'].shape[2] == 1
|
@ -1,114 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import json
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
|
||||
import torch
|
||||
|
||||
from mmocr.datasets.ner_dataset import NerDataset
|
||||
from mmocr.models.ner.convertors.ner_convertor import NerConvertor
|
||||
from mmocr.utils import list_to_file
|
||||
|
||||
|
||||
def _create_dummy_ann_file(ann_file):
|
||||
data = {
|
||||
'text': '彭小军认为,国内银行现在走的是台湾的发卡模式',
|
||||
'label': {
|
||||
'address': {
|
||||
'台湾': [[15, 16]]
|
||||
},
|
||||
'name': {
|
||||
'彭小军': [[0, 2]]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
list_to_file(ann_file, [json.dumps(data, ensure_ascii=False)])
|
||||
|
||||
|
||||
def _create_dummy_vocab_file(vocab_file):
|
||||
for char in list(map(chr, range(ord('a'), ord('z') + 1))):
|
||||
list_to_file(vocab_file, [json.dumps(char + '\n', ensure_ascii=False)])
|
||||
|
||||
|
||||
def _create_dummy_loader():
|
||||
loader = dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(type='LineJsonParser', keys=['text', 'label']))
|
||||
return loader
|
||||
|
||||
|
||||
def test_ner_dataset():
|
||||
# test initialization
|
||||
loader = _create_dummy_loader()
|
||||
categories = [
|
||||
'address', 'book', 'company', 'game', 'government', 'movie', 'name',
|
||||
'organization', 'position', 'scene'
|
||||
]
|
||||
|
||||
# create dummy data
|
||||
tmp_dir = tempfile.TemporaryDirectory()
|
||||
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
|
||||
vocab_file = osp.join(tmp_dir.name, 'fake_vocab.txt')
|
||||
_create_dummy_ann_file(ann_file)
|
||||
_create_dummy_vocab_file(vocab_file)
|
||||
|
||||
max_len = 128
|
||||
ner_convertor = dict(
|
||||
type='NerConvertor',
|
||||
annotation_type='bio',
|
||||
vocab_file=vocab_file,
|
||||
categories=categories,
|
||||
max_len=max_len)
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='NerTransform',
|
||||
label_convertor=ner_convertor,
|
||||
max_len=max_len),
|
||||
dict(type='ToTensorNER')
|
||||
]
|
||||
dataset = NerDataset(ann_file, loader, pipeline=test_pipeline)
|
||||
|
||||
# test pre_pipeline
|
||||
img_info = dataset.data_infos[0]
|
||||
results = dict(img_info=img_info)
|
||||
dataset.pre_pipeline(results)
|
||||
|
||||
# test prepare_train_img
|
||||
dataset.prepare_train_img(0)
|
||||
|
||||
# test evaluation
|
||||
result = [[['address', 15, 16], ['name', 0, 2]]]
|
||||
|
||||
dataset.evaluate(result)
|
||||
|
||||
# test pred convert2entity function
|
||||
pred = [
|
||||
21, 7, 17, 17, 21, 21, 21, 21, 21, 21, 13, 21, 21, 21, 21, 21, 1, 11,
|
||||
21, 21, 7, 17, 17, 21, 21, 21, 21, 21, 21, 13, 21, 21, 21, 21, 21, 1,
|
||||
11, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 1, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 1, 21, 21, 21, 21,
|
||||
21, 21
|
||||
]
|
||||
preds = [pred[:128]]
|
||||
mask = [0] * 128
|
||||
for i in range(10):
|
||||
mask[i] = 1
|
||||
assert len(preds[0]) == len(mask)
|
||||
masks = torch.tensor([mask])
|
||||
convertor = NerConvertor(
|
||||
annotation_type='bio',
|
||||
vocab_file=vocab_file,
|
||||
categories=categories,
|
||||
max_len=128)
|
||||
all_entities = convertor.convert_pred2entities(preds=preds, masks=masks)
|
||||
assert len(all_entities[0][0]) == 3
|
||||
|
||||
tmp_dir.cleanup()
|
@ -1,75 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import math
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
|
||||
from mmocr.datasets.ocr_dataset import OCRDataset
|
||||
|
||||
|
||||
def _create_dummy_ann_file(ann_file):
|
||||
ann_info1 = 'sample1.jpg hello'
|
||||
ann_info2 = 'sample2.jpg world'
|
||||
|
||||
with open(ann_file, 'w') as fw:
|
||||
for ann_info in [ann_info1, ann_info2]:
|
||||
fw.write(ann_info + '\n')
|
||||
|
||||
|
||||
def _create_dummy_loader():
|
||||
loader = dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(type='LineStrParser', keys=['file_name', 'text']))
|
||||
return loader
|
||||
|
||||
|
||||
def test_detect_dataset():
|
||||
tmp_dir = tempfile.TemporaryDirectory()
|
||||
# create dummy data
|
||||
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
|
||||
_create_dummy_ann_file(ann_file)
|
||||
|
||||
# test initialization
|
||||
loader = _create_dummy_loader()
|
||||
dataset = OCRDataset(ann_file, loader, pipeline=[])
|
||||
|
||||
tmp_dir.cleanup()
|
||||
|
||||
# test pre_pipeline
|
||||
img_info = dataset.data_infos[0]
|
||||
results = dict(img_info=img_info)
|
||||
dataset.pre_pipeline(results)
|
||||
assert results['img_prefix'] == dataset.img_prefix
|
||||
assert results['text'] == img_info['text']
|
||||
|
||||
# test evluation
|
||||
metric = 'acc'
|
||||
results = [{'text': 'hello'}, {'text': 'worl'}]
|
||||
eval_res = dataset.evaluate(results, metric)
|
||||
|
||||
assert math.isclose(eval_res['word_acc'], 0.5, abs_tol=1e-4)
|
||||
assert math.isclose(eval_res['char_precision'], 1.0, abs_tol=1e-4)
|
||||
assert math.isclose(eval_res['char_recall'], 0.9, abs_tol=1e-4)
|
||||
|
||||
eval_res = dataset.evaluate(results, metric='word_acc')
|
||||
assert math.isclose(eval_res['word_acc'], 0.5, abs_tol=1e-4)
|
||||
assert len(eval_res) == 1
|
||||
|
||||
eval_res = dataset.evaluate(
|
||||
results, metric=['char_precision', 'char_recall'])
|
||||
assert math.isclose(eval_res['char_precision'], 1.0, abs_tol=1e-4)
|
||||
assert math.isclose(eval_res['char_recall'], 0.9, abs_tol=1e-4)
|
||||
assert len(eval_res) == 2
|
||||
|
||||
results = [{'text': 'HELLO*'}, {'text': 'worl'}]
|
||||
eval_res = dataset.evaluate(
|
||||
results,
|
||||
metric=[
|
||||
'word_acc_ignore_case_symbol', 'word_acc_ignore_case',
|
||||
'one_minus_ned'
|
||||
])
|
||||
assert math.isclose(
|
||||
eval_res['word_acc_ignore_case_symbol'], 0.5, abs_tol=1e-4)
|
||||
assert math.isclose(eval_res['word_acc_ignore_case'], 0, abs_tol=1e-4)
|
||||
assert math.isclose(eval_res['1-N.E.D'], 0.9, abs_tol=1e-4)
|
||||
assert len(eval_res) == 3
|
@ -1,98 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import json
|
||||
import math
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
|
||||
import torch
|
||||
|
||||
from mmocr.datasets.openset_kie_dataset import OpensetKIEDataset
|
||||
from mmocr.utils import list_to_file
|
||||
|
||||
|
||||
def _create_dummy_ann_file(ann_file):
|
||||
ann_info1 = {
|
||||
'file_name':
|
||||
'1.png',
|
||||
'height':
|
||||
200,
|
||||
'width':
|
||||
200,
|
||||
'annotations': [{
|
||||
'text': 'store',
|
||||
'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0],
|
||||
'label': 1,
|
||||
'edge': 1
|
||||
}, {
|
||||
'text': 'MyFamily',
|
||||
'box': [23.0, 2.0, 31.0, 1.0, 24.0, 11.0, 16.0, 11.0],
|
||||
'label': 2,
|
||||
'edge': 1
|
||||
}]
|
||||
}
|
||||
list_to_file(ann_file, [json.dumps(ann_info1)])
|
||||
|
||||
return ann_info1
|
||||
|
||||
|
||||
def _create_dummy_dict_file(dict_file):
|
||||
dict_str = '0123'
|
||||
list_to_file(dict_file, list(dict_str))
|
||||
|
||||
|
||||
def _create_dummy_loader():
|
||||
loader = dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations']))
|
||||
return loader
|
||||
|
||||
|
||||
def test_openset_kie_dataset():
|
||||
with tempfile.TemporaryDirectory() as tmp_dir_name:
|
||||
# create dummy data
|
||||
ann_file = osp.join(tmp_dir_name, 'fake_data.txt')
|
||||
ann_info1 = _create_dummy_ann_file(ann_file)
|
||||
|
||||
dict_file = osp.join(tmp_dir_name, 'fake_dict.txt')
|
||||
_create_dummy_dict_file(dict_file)
|
||||
|
||||
# test initialization
|
||||
loader = _create_dummy_loader()
|
||||
dataset = OpensetKIEDataset(ann_file, loader, dict_file, pipeline=[])
|
||||
|
||||
dataset.prepare_train_img(0)
|
||||
|
||||
# test pre_pipeline
|
||||
img_ann_info = dataset.data_infos[0]
|
||||
img_info = {
|
||||
'filename': img_ann_info['file_name'],
|
||||
'height': img_ann_info['height'],
|
||||
'width': img_ann_info['width']
|
||||
}
|
||||
ann_info = dataset._parse_anno_info(img_ann_info['annotations'])
|
||||
results = dict(img_info=img_info, ann_info=ann_info)
|
||||
dataset.pre_pipeline(results)
|
||||
assert results['img_prefix'] == dataset.img_prefix
|
||||
assert 'ori_texts' in results
|
||||
|
||||
# test evaluation
|
||||
result = {
|
||||
'img_metas': [{
|
||||
'filename': ann_info1['file_name'],
|
||||
'ori_filename': ann_info1['file_name'],
|
||||
'ori_texts': [],
|
||||
'ori_bboxes': []
|
||||
}]
|
||||
}
|
||||
for anno in ann_info1['annotations']:
|
||||
result['img_metas'][0]['ori_texts'].append(anno['text'])
|
||||
result['img_metas'][0]['ori_bboxes'].append(anno['box'])
|
||||
result['nodes'] = torch.tensor([[0.01, 0.8, 0.01, 0.18],
|
||||
[0.01, 0.01, 0.9, 0.08]])
|
||||
result['edges'] = torch.Tensor([[0.01, 0.99] for _ in range(4)])
|
||||
|
||||
eval_res = dataset.evaluate([result])
|
||||
assert math.isclose(eval_res['edge_openset_f1'], 1.0, abs_tol=1e-4)
|
@ -1,64 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from mmocr.datasets.utils.parser import LineJsonParser, LineStrParser
|
||||
|
||||
|
||||
def test_line_str_parser():
|
||||
data_ret = ['sample1.jpg hello\n', 'sample2.jpg world']
|
||||
keys = ['filename', 'text']
|
||||
keys_idx = [0, 1]
|
||||
separator = ' '
|
||||
|
||||
# test init
|
||||
with pytest.raises(AssertionError):
|
||||
parser = LineStrParser('filename', keys_idx, separator)
|
||||
with pytest.raises(AssertionError):
|
||||
parser = LineStrParser(keys, keys_idx, [' '])
|
||||
with pytest.raises(AssertionError):
|
||||
parser = LineStrParser(keys, [0], separator)
|
||||
|
||||
# test get_item
|
||||
parser = LineStrParser(keys, keys_idx, separator)
|
||||
assert parser.get_item(data_ret, 0) == {
|
||||
'filename': 'sample1.jpg',
|
||||
'text': 'hello'
|
||||
}
|
||||
|
||||
with pytest.raises(Exception):
|
||||
parser = LineStrParser(['filename', 'text', 'ignore'], [0, 1, 2],
|
||||
separator)
|
||||
parser.get_item(data_ret, 0)
|
||||
|
||||
|
||||
def test_line_dict_parser():
|
||||
data_ret = [
|
||||
json.dumps({
|
||||
'filename': 'sample1.jpg',
|
||||
'text': 'hello'
|
||||
}),
|
||||
json.dumps({
|
||||
'filename': 'sample2.jpg',
|
||||
'text': 'world'
|
||||
})
|
||||
]
|
||||
keys = ['filename', 'text']
|
||||
|
||||
# test init
|
||||
with pytest.raises(AssertionError):
|
||||
parser = LineJsonParser('filename')
|
||||
with pytest.raises(AssertionError):
|
||||
parser = LineJsonParser([])
|
||||
|
||||
# test get_item
|
||||
parser = LineJsonParser(keys)
|
||||
assert parser.get_item(data_ret, 0) == {
|
||||
'filename': 'sample1.jpg',
|
||||
'text': 'hello'
|
||||
}
|
||||
|
||||
with pytest.raises(Exception):
|
||||
parser = LineJsonParser(['img_name', 'text'])
|
||||
parser.get_item(data_ret, 0)
|
@ -1,34 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from mmocr.datasets.pipelines.test_time_aug import MultiRotateAugOCR
|
||||
|
||||
|
||||
def test_resize_ocr():
|
||||
input_img1 = np.ones((64, 256, 3), dtype=np.uint8)
|
||||
input_img2 = np.ones((64, 32, 3), dtype=np.uint8)
|
||||
|
||||
rci = MultiRotateAugOCR(transforms=[], rotate_degrees=[0, 90, 270])
|
||||
|
||||
# test invalid arguments
|
||||
with pytest.raises(AssertionError):
|
||||
MultiRotateAugOCR(transforms=[], rotate_degrees=[45])
|
||||
with pytest.raises(AssertionError):
|
||||
MultiRotateAugOCR(transforms=[], rotate_degrees=[20.5])
|
||||
|
||||
# test call with input_img1
|
||||
results = {'img_shape': input_img1.shape, 'img': input_img1}
|
||||
results = rci(results)
|
||||
assert np.allclose([64, 256, 3], results['img_shape'])
|
||||
assert len(results['img']) == 1
|
||||
assert len(results['img_shape']) == 1
|
||||
assert np.allclose([64, 256, 3], results['img_shape'][0])
|
||||
|
||||
# test call with input_img2
|
||||
results = {'img_shape': input_img2.shape, 'img': input_img2}
|
||||
results = rci(results)
|
||||
assert np.allclose([64, 32, 3], results['img_shape'])
|
||||
assert len(results['img']) == 3
|
||||
assert len(results['img_shape']) == 3
|
||||
assert np.allclose([64, 32, 3], results['img_shape'][0])
|
@ -1,263 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from unittest import mock
|
||||
|
||||
import numpy as np
|
||||
from mmdet.core import PolygonMasks
|
||||
|
||||
import mmocr.datasets.pipelines.custom_format_bundle as cf_bundle
|
||||
import mmocr.datasets.pipelines.textdet_targets as textdet_targets
|
||||
|
||||
|
||||
@mock.patch('%s.cf_bundle.show_feature' % __name__)
|
||||
def test_gen_pannet_targets(mock_show_feature):
|
||||
|
||||
target_generator = textdet_targets.PANetTargets()
|
||||
assert target_generator.max_shrink == 20
|
||||
|
||||
# test generate_kernels
|
||||
img_size = (3, 10)
|
||||
text_polys = [[np.array([0, 0, 1, 0, 1, 1, 0, 1])],
|
||||
[np.array([2, 0, 3, 0, 3, 1, 2, 1])]]
|
||||
shrink_ratio = 1.0
|
||||
kernel = np.array([[1, 1, 2, 2, 0, 0, 0, 0, 0, 0],
|
||||
[1, 1, 2, 2, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
|
||||
output, _ = target_generator.generate_kernels(img_size, text_polys,
|
||||
shrink_ratio)
|
||||
print(output)
|
||||
assert np.allclose(output, kernel)
|
||||
|
||||
# test generate_effective_mask
|
||||
polys_ignore = text_polys
|
||||
output = target_generator.generate_effective_mask((3, 10), polys_ignore)
|
||||
target = np.array([[0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
|
||||
[0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
|
||||
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
|
||||
|
||||
assert np.allclose(output, target)
|
||||
|
||||
# test generate_targets
|
||||
results = {}
|
||||
results['img'] = np.zeros((3, 10, 3), np.uint8)
|
||||
results['gt_masks'] = PolygonMasks(text_polys, 3, 10)
|
||||
results['gt_masks_ignore'] = PolygonMasks([], 3, 10)
|
||||
results['img_shape'] = (3, 10, 3)
|
||||
results['mask_fields'] = []
|
||||
output = target_generator(results)
|
||||
assert len(output['gt_kernels']) == 2
|
||||
assert len(output['gt_mask']) == 1
|
||||
|
||||
bundle = cf_bundle.CustomFormatBundle(
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=True, boundary_key='gt_kernels'))
|
||||
bundle(output)
|
||||
assert 'gt_kernels' in output.keys()
|
||||
assert 'gt_mask' in output.keys()
|
||||
mock_show_feature.assert_called_once()
|
||||
|
||||
|
||||
def test_gen_psenet_targets():
|
||||
target_generator = textdet_targets.PSENetTargets()
|
||||
assert target_generator.max_shrink == 20
|
||||
assert target_generator.shrink_ratio == (1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4)
|
||||
|
||||
|
||||
# Test DBNetTargets
|
||||
|
||||
|
||||
def test_dbnet_targets_find_invalid():
|
||||
target_generator = textdet_targets.DBNetTargets()
|
||||
assert target_generator.shrink_ratio == 0.4
|
||||
assert target_generator.thr_min == 0.3
|
||||
assert target_generator.thr_max == 0.7
|
||||
|
||||
results = {}
|
||||
text_polys = [[np.array([0, 0, 10, 0, 10, 10, 0, 10])],
|
||||
[np.array([20, 0, 30, 0, 30, 10, 20, 10])]]
|
||||
results['gt_masks'] = PolygonMasks(text_polys, 40, 40)
|
||||
|
||||
ignore_tags = target_generator.find_invalid(results)
|
||||
assert np.allclose(ignore_tags, [False, False])
|
||||
|
||||
|
||||
def test_dbnet_targets():
|
||||
target_generator = textdet_targets.DBNetTargets()
|
||||
assert target_generator.shrink_ratio == 0.4
|
||||
assert target_generator.thr_min == 0.3
|
||||
assert target_generator.thr_max == 0.7
|
||||
|
||||
|
||||
def test_dbnet_ignore_texts():
|
||||
target_generator = textdet_targets.DBNetTargets()
|
||||
ignore_tags = [True, False]
|
||||
results = {}
|
||||
text_polys = [[np.array([0, 0, 10, 0, 10, 10, 0, 10])],
|
||||
[np.array([20, 0, 30, 0, 30, 10, 20, 10])]]
|
||||
text_polys_ignore = [[np.array([0, 0, 15, 0, 15, 10, 0, 10])]]
|
||||
|
||||
results['gt_masks_ignore'] = PolygonMasks(text_polys_ignore, 40, 40)
|
||||
results['gt_masks'] = PolygonMasks(text_polys, 40, 40)
|
||||
results['gt_bboxes'] = np.array([[0, 0, 10, 10], [20, 0, 30, 10]])
|
||||
results['gt_labels'] = np.array([0, 1])
|
||||
|
||||
target_generator.ignore_texts(results, ignore_tags)
|
||||
|
||||
assert np.allclose(results['gt_labels'], np.array([1]))
|
||||
assert len(results['gt_masks_ignore'].masks) == 2
|
||||
assert np.allclose(results['gt_masks_ignore'].masks[1][0],
|
||||
text_polys[0][0])
|
||||
assert len(results['gt_masks'].masks) == 1
|
||||
|
||||
|
||||
def test_dbnet_generate_thr_map():
|
||||
target_generator = textdet_targets.DBNetTargets()
|
||||
text_polys = [[np.array([0, 0, 10, 0, 10, 10, 0, 10])],
|
||||
[np.array([20, 0, 30, 0, 30, 10, 20, 10])]]
|
||||
thr_map, thr_mask = target_generator.generate_thr_map((40, 40), text_polys)
|
||||
assert np.all((thr_map >= 0.29) * (thr_map <= 0.71))
|
||||
|
||||
|
||||
def test_dbnet_draw_border_map():
|
||||
target_generator = textdet_targets.DBNetTargets()
|
||||
poly = np.array([[20, 21], [-14, 20], [-11, 30], [-22, 26]])
|
||||
img_size = (40, 40)
|
||||
thr_map = np.zeros(img_size, dtype=np.float32)
|
||||
thr_mask = np.zeros(img_size, dtype=np.uint8)
|
||||
|
||||
target_generator.draw_border_map(poly, thr_map, thr_mask)
|
||||
|
||||
|
||||
def test_dbnet_generate_targets():
|
||||
target_generator = textdet_targets.DBNetTargets()
|
||||
text_polys = [[np.array([0, 0, 10, 0, 10, 10, 0, 10])],
|
||||
[np.array([20, 0, 30, 0, 30, 10, 20, 10])]]
|
||||
text_polys_ignore = [[np.array([0, 0, 15, 0, 15, 10, 0, 10])]]
|
||||
|
||||
results = {}
|
||||
results['mask_fields'] = []
|
||||
results['img_shape'] = (40, 40, 3)
|
||||
results['gt_masks_ignore'] = PolygonMasks(text_polys_ignore, 40, 40)
|
||||
results['gt_masks'] = PolygonMasks(text_polys, 40, 40)
|
||||
results['gt_bboxes'] = np.array([[0, 0, 10, 10], [20, 0, 30, 10]])
|
||||
results['gt_labels'] = np.array([0, 1])
|
||||
|
||||
target_generator.generate_targets(results)
|
||||
assert 'gt_shrink' in results['mask_fields']
|
||||
assert 'gt_shrink_mask' in results['mask_fields']
|
||||
assert 'gt_thr' in results['mask_fields']
|
||||
assert 'gt_thr_mask' in results['mask_fields']
|
||||
|
||||
|
||||
def test_fcenet_generate_targets():
|
||||
fourier_degree = 5
|
||||
target_generator = textdet_targets.FCENetTargets(
|
||||
fourier_degree=fourier_degree)
|
||||
|
||||
h, w, c = (64, 64, 3)
|
||||
text_polys = [[np.array([0, 0, 10, 0, 10, 10, 0, 10])],
|
||||
[np.array([20, 0, 30, 0, 30, 10, 20, 10])]]
|
||||
text_polys_ignore = [[np.array([0, 0, 15, 0, 15, 10, 0, 10])]]
|
||||
|
||||
results = {}
|
||||
results['mask_fields'] = []
|
||||
results['img_shape'] = (h, w, c)
|
||||
results['gt_masks_ignore'] = PolygonMasks(text_polys_ignore, h, w)
|
||||
results['gt_masks'] = PolygonMasks(text_polys, h, w)
|
||||
results['gt_bboxes'] = np.array([[0, 0, 10, 10], [20, 0, 30, 10]])
|
||||
results['gt_labels'] = np.array([0, 1])
|
||||
|
||||
target_generator.generate_targets(results)
|
||||
assert 'p3_maps' in results.keys()
|
||||
assert 'p4_maps' in results.keys()
|
||||
assert 'p5_maps' in results.keys()
|
||||
|
||||
|
||||
def test_gen_drrg_targets():
|
||||
target_generator = textdet_targets.DRRGTargets()
|
||||
assert np.allclose(target_generator.orientation_thr, 2.0)
|
||||
assert np.allclose(target_generator.resample_step, 8.0)
|
||||
assert target_generator.num_min_comps == 9
|
||||
assert target_generator.num_max_comps == 600
|
||||
assert np.allclose(target_generator.min_width, 8.0)
|
||||
assert np.allclose(target_generator.max_width, 24.0)
|
||||
assert np.allclose(target_generator.center_region_shrink_ratio, 0.3)
|
||||
assert np.allclose(target_generator.comp_shrink_ratio, 1.0)
|
||||
assert np.allclose(target_generator.comp_w_h_ratio, 0.3)
|
||||
assert np.allclose(target_generator.text_comp_nms_thr, 0.25)
|
||||
assert np.allclose(target_generator.min_rand_half_height, 8.0)
|
||||
assert np.allclose(target_generator.max_rand_half_height, 24.0)
|
||||
assert np.allclose(target_generator.jitter_level, 0.2)
|
||||
|
||||
# test generate_targets
|
||||
target_generator = textdet_targets.DRRGTargets(
|
||||
min_width=2.,
|
||||
max_width=4.,
|
||||
min_rand_half_height=3.,
|
||||
max_rand_half_height=5.)
|
||||
|
||||
results = {}
|
||||
results['img'] = np.zeros((64, 64, 3), np.uint8)
|
||||
text_polys = [[np.array([4, 2, 30, 2, 30, 10, 4, 10])],
|
||||
[np.array([36, 12, 8, 12, 8, 22, 36, 22])],
|
||||
[np.array([48, 20, 52, 20, 52, 50, 48, 50])],
|
||||
[np.array([44, 50, 38, 50, 38, 20, 44, 20])]]
|
||||
results['gt_masks'] = PolygonMasks(text_polys, 20, 30)
|
||||
results['gt_masks_ignore'] = PolygonMasks([], 64, 64)
|
||||
results['img_shape'] = (64, 64, 3)
|
||||
results['mask_fields'] = []
|
||||
output = target_generator(results)
|
||||
assert len(output['gt_text_mask']) == 1
|
||||
assert len(output['gt_center_region_mask']) == 1
|
||||
assert len(output['gt_mask']) == 1
|
||||
assert len(output['gt_top_height_map']) == 1
|
||||
assert len(output['gt_bot_height_map']) == 1
|
||||
assert len(output['gt_sin_map']) == 1
|
||||
assert len(output['gt_cos_map']) == 1
|
||||
assert output['gt_comp_attribs'].shape[-1] == 8
|
||||
|
||||
# test generate_targets with the number of proposed text components exceeds
|
||||
# num_max_comps
|
||||
target_generator = textdet_targets.DRRGTargets(
|
||||
min_width=2.,
|
||||
max_width=4.,
|
||||
min_rand_half_height=3.,
|
||||
max_rand_half_height=5.,
|
||||
num_max_comps=6)
|
||||
output = target_generator(results)
|
||||
assert output['gt_comp_attribs'].ndim == 2
|
||||
assert output['gt_comp_attribs'].shape[0] == 6
|
||||
|
||||
# test generate_targets with blank polygon masks
|
||||
target_generator = textdet_targets.DRRGTargets(
|
||||
min_width=2.,
|
||||
max_width=4.,
|
||||
min_rand_half_height=3.,
|
||||
max_rand_half_height=5.)
|
||||
results = {}
|
||||
results['img'] = np.zeros((20, 30, 3), np.uint8)
|
||||
results['gt_masks'] = PolygonMasks([], 20, 30)
|
||||
results['gt_masks_ignore'] = PolygonMasks([], 20, 30)
|
||||
results['img_shape'] = (20, 30, 3)
|
||||
results['mask_fields'] = []
|
||||
output = target_generator(results)
|
||||
assert output['gt_comp_attribs'][0, 0] > 8
|
||||
|
||||
# test generate_targets with one proposed text component
|
||||
text_polys = [[np.array([13, 6, 17, 6, 17, 14, 13, 14])]]
|
||||
target_generator = textdet_targets.DRRGTargets(
|
||||
min_width=4.,
|
||||
max_width=8.,
|
||||
min_rand_half_height=3.,
|
||||
max_rand_half_height=5.)
|
||||
results['gt_masks'] = PolygonMasks(text_polys, 20, 30)
|
||||
output = target_generator(results)
|
||||
assert output['gt_comp_attribs'][0, 0] > 8
|
||||
|
||||
# test generate_targets with shrunk margin in generate_rand_comp_attribs
|
||||
target_generator = textdet_targets.DRRGTargets(
|
||||
min_width=2.,
|
||||
max_width=30.,
|
||||
min_rand_half_height=3.,
|
||||
max_rand_half_height=30.)
|
||||
output = target_generator(results)
|
||||
assert output['gt_comp_attribs'][0, 0] > 8
|
@ -1,66 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import copy
|
||||
import unittest.mock as mock
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from mmocr.datasets.pipelines import (OneOfWrapper, RandomWrapper,
|
||||
TorchVisionWrapper)
|
||||
from mmocr.datasets.pipelines.transforms import ColorJitter
|
||||
|
||||
|
||||
def test_torchvision_wrapper():
|
||||
x = {'img': np.ones((128, 100, 3), dtype=np.uint8)}
|
||||
# object not found error
|
||||
with pytest.raises(Exception):
|
||||
TorchVisionWrapper(op='NonExist')
|
||||
with pytest.raises(TypeError):
|
||||
TorchVisionWrapper()
|
||||
f = TorchVisionWrapper('Grayscale')
|
||||
with pytest.raises(AssertionError):
|
||||
f({})
|
||||
results = f(x)
|
||||
assert results['img'].shape == (128, 100)
|
||||
assert results['img_shape'] == (128, 100)
|
||||
|
||||
|
||||
@mock.patch('random.choice')
|
||||
def test_oneof(rand_choice):
|
||||
color_jitter = dict(type='TorchVisionWrapper', op='ColorJitter')
|
||||
gray_scale = dict(type='TorchVisionWrapper', op='Grayscale')
|
||||
x = {'img': np.random.randint(0, 256, size=(128, 100, 3), dtype=np.uint8)}
|
||||
f = OneOfWrapper([color_jitter, gray_scale])
|
||||
# Use color_jitter at the first call
|
||||
rand_choice.side_effect = lambda x: x[0]
|
||||
results = f(x)
|
||||
assert results['img'].shape == (128, 100, 3)
|
||||
# Use gray_scale at the second call
|
||||
rand_choice.side_effect = lambda x: x[1]
|
||||
results = f(x)
|
||||
assert results['img'].shape == (128, 100)
|
||||
|
||||
# Passing object
|
||||
f = OneOfWrapper([ColorJitter(), gray_scale])
|
||||
# Use color_jitter at the first call
|
||||
results = f(x)
|
||||
assert results['img'].shape == (128, 100)
|
||||
|
||||
# Test invalid inputs
|
||||
with pytest.raises(AssertionError):
|
||||
f = OneOfWrapper(None)
|
||||
with pytest.raises(AssertionError):
|
||||
f = OneOfWrapper([])
|
||||
with pytest.raises(AssertionError):
|
||||
f = OneOfWrapper({})
|
||||
|
||||
|
||||
@mock.patch('numpy.random.uniform')
|
||||
def test_runwithprob(np_random_uniform):
|
||||
np_random_uniform.side_effect = [0.1, 0.9]
|
||||
f = RandomWrapper([dict(type='TorchVisionWrapper', op='Grayscale')], 0.5)
|
||||
img = np.random.randint(0, 256, size=(128, 100, 3), dtype=np.uint8)
|
||||
results = f({'img': copy.deepcopy(img)})
|
||||
assert results['img'].shape == (128, 100)
|
||||
results = f({'img': copy.deepcopy(img)})
|
||||
assert results['img'].shape == (128, 100, 3)
|
@ -1,42 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import unittest.mock as mock
|
||||
|
||||
import numpy as np
|
||||
|
||||
import mmocr.datasets.pipelines.transforms as transforms
|
||||
|
||||
|
||||
@mock.patch('%s.transforms.np.random.random_sample' % __name__)
|
||||
def test_scale_aspect_jitter(mock_random):
|
||||
img_scale = [(3000, 1000)] # unused
|
||||
ratio_range = (0.5, 1.5)
|
||||
aspect_ratio_range = (1, 1)
|
||||
multiscale_mode = 'value'
|
||||
long_size_bound = 2000
|
||||
short_size_bound = 640
|
||||
resize_type = 'long_short_bound'
|
||||
keep_ratio = False
|
||||
jitter = transforms.ScaleAspectJitter(
|
||||
img_scale=img_scale,
|
||||
ratio_range=ratio_range,
|
||||
aspect_ratio_range=aspect_ratio_range,
|
||||
multiscale_mode=multiscale_mode,
|
||||
long_size_bound=long_size_bound,
|
||||
short_size_bound=short_size_bound,
|
||||
resize_type=resize_type,
|
||||
keep_ratio=keep_ratio)
|
||||
mock_random.side_effect = [0.5]
|
||||
|
||||
# test sample_from_range
|
||||
|
||||
result = jitter.sample_from_range([100, 200])
|
||||
assert result == 150
|
||||
|
||||
# test _random_scale
|
||||
results = {}
|
||||
results['img'] = np.zeros((4000, 1000))
|
||||
mock_random.side_effect = [0.5, 1]
|
||||
jitter._random_scale(results)
|
||||
# scale1 0.5, scale2=1 scale =0.5 650/1000, w, h
|
||||
# print(results['scale'])
|
||||
assert results['scale'] == (650, 2600)
|
@ -1,130 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import copy
|
||||
|
||||
import pytest
|
||||
from mmdet.datasets import DATASETS
|
||||
|
||||
from mmocr.datasets import UniformConcatDataset
|
||||
from mmocr.utils import list_from_file
|
||||
|
||||
|
||||
def test_uniform_concat_dataset_pipeline():
|
||||
pipeline1 = [dict(type='LoadImageFromFile')]
|
||||
pipeline2 = [dict(type='LoadImageFromFile'), dict(type='ColorJitter')]
|
||||
|
||||
img_prefix = 'tests/data/ocr_toy_dataset/imgs'
|
||||
ann_file = 'tests/data/ocr_toy_dataset/label.txt'
|
||||
train1 = dict(
|
||||
type='OCRDataset',
|
||||
img_prefix=img_prefix,
|
||||
ann_file=ann_file,
|
||||
loader=dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
train2 = {key: value for key, value in train1.items()}
|
||||
train2['pipeline'] = pipeline2
|
||||
|
||||
# pipeline is 1d list
|
||||
copy_train1 = copy.deepcopy(train1)
|
||||
copy_train2 = copy.deepcopy(train2)
|
||||
tmp_dataset = UniformConcatDataset(
|
||||
datasets=[copy_train1, copy_train2],
|
||||
pipeline=pipeline1,
|
||||
force_apply=True)
|
||||
|
||||
assert len(tmp_dataset) == 2 * len(list_from_file(ann_file))
|
||||
assert len(tmp_dataset.datasets[0].pipeline.transforms) == len(
|
||||
tmp_dataset.datasets[1].pipeline.transforms)
|
||||
|
||||
# pipeline is None
|
||||
copy_train2 = copy.deepcopy(train2)
|
||||
tmp_dataset = UniformConcatDataset(datasets=[copy_train2], pipeline=None)
|
||||
assert len(tmp_dataset.datasets[0].pipeline.transforms) == len(pipeline2)
|
||||
|
||||
copy_train2 = copy.deepcopy(train2)
|
||||
tmp_dataset = UniformConcatDataset(
|
||||
datasets=[[copy_train2], [copy_train2]], pipeline=None)
|
||||
assert len(tmp_dataset.datasets[0].pipeline.transforms) == len(pipeline2)
|
||||
|
||||
# pipeline is 2d list
|
||||
copy_train1 = copy.deepcopy(train1)
|
||||
copy_train2 = copy.deepcopy(train2)
|
||||
tmp_dataset = UniformConcatDataset(
|
||||
datasets=[[copy_train1], [copy_train2]],
|
||||
pipeline=[pipeline1, pipeline2])
|
||||
assert len(tmp_dataset.datasets[0].pipeline.transforms) == len(pipeline1)
|
||||
|
||||
|
||||
def test_uniform_concat_dataset_eval():
|
||||
|
||||
@DATASETS.register_module()
|
||||
class DummyDataset:
|
||||
|
||||
def __init__(self):
|
||||
self.CLASSES = 0
|
||||
self.ann_file = 'empty'
|
||||
|
||||
def __len__(self):
|
||||
return 1
|
||||
|
||||
def evaluate(self, res, logger, **kwargs):
|
||||
return dict(n=res[0])
|
||||
|
||||
# Test 'auto'
|
||||
fake_inputs = [10]
|
||||
datasets = [dict(type='DummyDataset')]
|
||||
tmp_dataset = UniformConcatDataset(datasets)
|
||||
results = tmp_dataset.evaluate(fake_inputs)
|
||||
assert results['0_n'] == 10
|
||||
assert 'mean_n' not in results
|
||||
|
||||
tmp_dataset = UniformConcatDataset(datasets, show_mean_scores=True)
|
||||
results = tmp_dataset.evaluate(fake_inputs)
|
||||
assert results['mean_n'] == 10
|
||||
|
||||
fake_inputs = [10, 20]
|
||||
datasets = [dict(type='DummyDataset'), dict(type='DummyDataset')]
|
||||
tmp_dataset = UniformConcatDataset(datasets)
|
||||
tmp_dataset = UniformConcatDataset(datasets)
|
||||
results = tmp_dataset.evaluate(fake_inputs)
|
||||
assert results['0_n'] == 10
|
||||
assert results['1_n'] == 20
|
||||
assert results['mean_n'] == 15
|
||||
|
||||
tmp_dataset = UniformConcatDataset(datasets, show_mean_scores=False)
|
||||
results = tmp_dataset.evaluate(fake_inputs)
|
||||
assert results['0_n'] == 10
|
||||
assert results['1_n'] == 20
|
||||
assert 'mean_n' not in results
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ds = UniformConcatDataset(datasets, separate_eval=False)
|
||||
ds.evaluate(fake_inputs)
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
|
||||
@DATASETS.register_module()
|
||||
class DummyDataset2:
|
||||
|
||||
def __init__(self):
|
||||
self.CLASSES = 0
|
||||
self.ann_file = 'empty'
|
||||
|
||||
def __len__(self):
|
||||
return 1
|
||||
|
||||
def evaluate(self, res, logger, **kwargs):
|
||||
return dict(n=res[0])
|
||||
|
||||
UniformConcatDataset(
|
||||
[dict(type='DummyDataset'),
|
||||
dict(type='DummyDataset2')],
|
||||
show_mean_scores=True)
|
@ -1,72 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from mmocr.core.evaluation.hmean import (eval_hmean, get_gt_masks,
|
||||
output_ranklist)
|
||||
|
||||
|
||||
def _create_dummy_ann_infos():
|
||||
ann_infos = {
|
||||
'bboxes': np.array([[50., 70., 80., 100.]], dtype=np.float32),
|
||||
'labels': np.array([1], dtype=np.int64),
|
||||
'bboxes_ignore': np.array([[120, 140, 200, 200]], dtype=np.float32),
|
||||
'masks': [[[50, 70, 80, 70, 80, 100, 50, 100]]],
|
||||
'masks_ignore': [[[120, 140, 200, 140, 200, 200, 120, 200]]]
|
||||
}
|
||||
return [ann_infos]
|
||||
|
||||
|
||||
def test_output_ranklist():
|
||||
result = [{'hmean': 1}, {'hmean': 0.5}]
|
||||
file_name = tempfile.NamedTemporaryFile().name
|
||||
img_infos = [{'file_name': 'sample1.jpg'}, {'file_name': 'sample2.jpg'}]
|
||||
|
||||
json_file = file_name + '.json'
|
||||
with pytest.raises(AssertionError):
|
||||
output_ranklist([[]], img_infos, json_file)
|
||||
with pytest.raises(AssertionError):
|
||||
output_ranklist(result, [[]], json_file)
|
||||
with pytest.raises(AssertionError):
|
||||
output_ranklist(result, img_infos, file_name)
|
||||
|
||||
sorted_outputs = output_ranklist(result, img_infos, json_file)
|
||||
|
||||
assert sorted_outputs[0]['hmean'] == 0.5
|
||||
|
||||
|
||||
def test_get_gt_mask():
|
||||
ann_infos = _create_dummy_ann_infos()
|
||||
gt_masks, gt_masks_ignore = get_gt_masks(ann_infos)
|
||||
|
||||
assert np.allclose(gt_masks[0], [[50, 70, 80, 70, 80, 100, 50, 100]])
|
||||
assert np.allclose(gt_masks_ignore[0],
|
||||
[[120, 140, 200, 140, 200, 200, 120, 200]])
|
||||
|
||||
|
||||
def test_eval_hmean():
|
||||
metrics = {'hmean-iou', 'hmean-ic13'}
|
||||
results = [{
|
||||
'boundary_result': [[50, 70, 80, 70, 80, 100, 50, 100, 1],
|
||||
[120, 140, 200, 140, 200, 200, 120, 200, 1]]
|
||||
}]
|
||||
|
||||
img_infos = [{'file_name': 'sample1.jpg'}]
|
||||
ann_infos = _create_dummy_ann_infos()
|
||||
|
||||
# test invalid arguments
|
||||
with pytest.raises(AssertionError):
|
||||
eval_hmean(results, [[]], ann_infos, metrics=metrics)
|
||||
with pytest.raises(AssertionError):
|
||||
eval_hmean(results, img_infos, [[]], metrics=metrics)
|
||||
with pytest.raises(AssertionError):
|
||||
eval_hmean([[]], img_infos, ann_infos, metrics=metrics)
|
||||
with pytest.raises(AssertionError):
|
||||
eval_hmean(results, img_infos, ann_infos, metrics='hmean-iou')
|
||||
|
||||
eval_results = eval_hmean(results, img_infos, ann_infos, metrics=metrics)
|
||||
|
||||
assert eval_results['hmean-iou:hmean'] == 1
|
||||
assert eval_results['hmean-ic13:hmean'] == 1
|
@ -1,41 +0,0 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
"""Test hmean_iou."""
|
||||
import pytest
|
||||
|
||||
import mmocr.core.evaluation.hmean_iou as hmean_iou
|
||||
|
||||
|
||||
def test_eval_hmean_iou():
|
||||
|
||||
pred_boxes = []
|
||||
gt_boxes = []
|
||||
gt_ignored_boxes = []
|
||||
iou_thr = 0.5
|
||||
precision_thr = 0.5
|
||||
|
||||
# test invalid arguments.
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
hmean_iou.eval_hmean_iou([1], gt_boxes, gt_ignored_boxes, iou_thr,
|
||||
precision_thr)
|
||||
with pytest.raises(AssertionError):
|
||||
hmean_iou.eval_hmean_iou(pred_boxes, [1], gt_ignored_boxes, iou_thr,
|
||||
precision_thr)
|
||||
with pytest.raises(AssertionError):
|
||||
hmean_iou.eval_hmean_iou(pred_boxes, gt_boxes, [1], iou_thr,
|
||||
precision_thr)
|
||||
with pytest.raises(AssertionError):
|
||||
hmean_iou.eval_hmean_iou(pred_boxes, gt_boxes, gt_ignored_boxes, 1.1,
|
||||
precision_thr)
|
||||
with pytest.raises(AssertionError):
|
||||
hmean_iou.eval_hmean_iou(pred_boxes, gt_boxes, gt_ignored_boxes,
|
||||
iou_thr, 1.1)
|
||||
|
||||
pred_boxes = [[[0, 0, 1, 0, 1, 1, 0, 1], [2, 0, 3, 0, 3, 1, 2, 1]]]
|
||||
gt_boxes = [[[0, 0, 1, 0, 1, 1, 0, 1], [2, 0, 3, 0, 3, 1, 2, 1]]]
|
||||
gt_ignored_boxes = [[]]
|
||||
results = hmean_iou.eval_hmean_iou(pred_boxes, gt_boxes, gt_ignored_boxes,
|
||||
iou_thr, precision_thr)
|
||||
assert results[1][0]['recall'] == 1
|
||||
assert results[1][0]['precision'] == 1
|
||||
assert results[1][0]['hmean'] == 1
|
Loading…
x
Reference in New Issue
Block a user