mmocr/tests/test_utils/test_box.py
GT 9582f93c70
Support ocr box stitching (#290)
* ocr box stitching

* rename a varible

* add a demo

* move functions to box_util
add test
2021-06-19 19:28:14 +08:00

48 lines
1.6 KiB
Python

from mmocr.utils import is_on_same_line, stitch_boxes_into_lines
def test_box_on_line():
# regular boxes
box1 = [0, 0, 1, 0, 1, 1, 0, 1]
box2 = [2, 0.5, 3, 0.5, 3, 1.5, 2, 1.5]
box3 = [4, 0.8, 5, 0.8, 5, 1.8, 4, 1.8]
assert is_on_same_line(box1, box2, 0.5)
assert not is_on_same_line(box1, box3, 0.5)
# irregular box4
box4 = [0, 0, 1, 1, 1, 2, 0, 1]
box5 = [2, 1.5, 3, 1.5, 3, 2.5, 2, 2.5]
box6 = [2, 1.6, 3, 1.6, 3, 2.6, 2, 2.6]
assert is_on_same_line(box4, box5, 0.5)
assert not is_on_same_line(box4, box6, 0.5)
def test_stitch_boxes_into_lines():
boxes = [ # regular boxes
[0, 0, 1, 0, 1, 1, 0, 1],
[2, 0.5, 3, 0.5, 3, 1.5, 2, 1.5],
[3, 1.2, 4, 1.2, 4, 2.2, 3, 2.2],
[5, 0.5, 6, 0.5, 6, 1.5, 5, 1.5],
# irregular box
[6, 1.5, 7, 1.25, 7, 1.75, 6, 1.75]
]
raw_input = [{'box': boxes[i], 'text': str(i)} for i in range(len(boxes))]
result = stitch_boxes_into_lines(raw_input, 1, 0.5)
# Final lines: [0, 1], [2], [3, 4]
# box 0, 1, 3, 4 are on the same line but box 3 is 2 pixels away from box 1
# box 3 and 4 are on the same line since the length of overlapping part >=
# 0.5 * the y-axis length of box 5
expected_result = [{
'box': [0, 0, 3, 0, 3, 1.5, 0, 1.5],
'text': '0 1'
}, {
'box': [3, 1.2, 4, 1.2, 4, 2.2, 3, 2.2],
'text': '2'
}, {
'box': [5, 0.5, 7, 0.5, 7, 1.75, 5, 1.75],
'text': '3 4'
}]
result.sort(key=lambda x: x['box'][0])
expected_result.sort(key=lambda x: x['box'][0])
assert result == expected_result