mirror of
https://github.com/open-mmlab/mmocr.git
synced 2025-06-03 21:54:47 +08:00
* ocr box stitching * rename a varible * add a demo * move functions to box_util add test
48 lines
1.6 KiB
Python
48 lines
1.6 KiB
Python
from mmocr.utils import is_on_same_line, stitch_boxes_into_lines
|
|
|
|
|
|
def test_box_on_line():
|
|
# regular boxes
|
|
box1 = [0, 0, 1, 0, 1, 1, 0, 1]
|
|
box2 = [2, 0.5, 3, 0.5, 3, 1.5, 2, 1.5]
|
|
box3 = [4, 0.8, 5, 0.8, 5, 1.8, 4, 1.8]
|
|
assert is_on_same_line(box1, box2, 0.5)
|
|
assert not is_on_same_line(box1, box3, 0.5)
|
|
|
|
# irregular box4
|
|
box4 = [0, 0, 1, 1, 1, 2, 0, 1]
|
|
box5 = [2, 1.5, 3, 1.5, 3, 2.5, 2, 2.5]
|
|
box6 = [2, 1.6, 3, 1.6, 3, 2.6, 2, 2.6]
|
|
assert is_on_same_line(box4, box5, 0.5)
|
|
assert not is_on_same_line(box4, box6, 0.5)
|
|
|
|
|
|
def test_stitch_boxes_into_lines():
|
|
boxes = [ # regular boxes
|
|
[0, 0, 1, 0, 1, 1, 0, 1],
|
|
[2, 0.5, 3, 0.5, 3, 1.5, 2, 1.5],
|
|
[3, 1.2, 4, 1.2, 4, 2.2, 3, 2.2],
|
|
[5, 0.5, 6, 0.5, 6, 1.5, 5, 1.5],
|
|
# irregular box
|
|
[6, 1.5, 7, 1.25, 7, 1.75, 6, 1.75]
|
|
]
|
|
raw_input = [{'box': boxes[i], 'text': str(i)} for i in range(len(boxes))]
|
|
result = stitch_boxes_into_lines(raw_input, 1, 0.5)
|
|
# Final lines: [0, 1], [2], [3, 4]
|
|
# box 0, 1, 3, 4 are on the same line but box 3 is 2 pixels away from box 1
|
|
# box 3 and 4 are on the same line since the length of overlapping part >=
|
|
# 0.5 * the y-axis length of box 5
|
|
expected_result = [{
|
|
'box': [0, 0, 3, 0, 3, 1.5, 0, 1.5],
|
|
'text': '0 1'
|
|
}, {
|
|
'box': [3, 1.2, 4, 1.2, 4, 2.2, 3, 2.2],
|
|
'text': '2'
|
|
}, {
|
|
'box': [5, 0.5, 7, 0.5, 7, 1.75, 5, 1.75],
|
|
'text': '3 4'
|
|
}]
|
|
result.sort(key=lambda x: x['box'][0])
|
|
expected_result.sort(key=lambda x: x['box'][0])
|
|
assert result == expected_result
|