From 20a87d476c201f5b7eff38aa52922b4c5dc9120e Mon Sep 17 00:00:00 2001 From: Tong Gao Date: Thu, 9 Feb 2023 18:31:25 +0800 Subject: [PATCH] [Fix] Fix some inferencer bugs (#1706) * [Fix] Fix some inferencer bugs * fix --- configs/kie/sdmgr/_base_sdmgr_unet16.py | 2 +- mmocr/apis/inferencers/base_mmocr_inferencer.py | 3 ++- mmocr/apis/inferencers/kie_inferencer.py | 13 +++++++++++-- mmocr/ocr.py | 5 +++-- .../test_inferencers/test_textdet_inferencer.py | 11 +++++++---- .../test_inferencers/test_textrec_inferencer.py | 11 +++++++---- 6 files changed, 31 insertions(+), 14 deletions(-) diff --git a/configs/kie/sdmgr/_base_sdmgr_unet16.py b/configs/kie/sdmgr/_base_sdmgr_unet16.py index d20090d1..76aa631b 100644 --- a/configs/kie/sdmgr/_base_sdmgr_unet16.py +++ b/configs/kie/sdmgr/_base_sdmgr_unet16.py @@ -24,5 +24,5 @@ test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadKIEAnnotations'), dict(type='Resize', scale=(1024, 512), keep_ratio=True), - dict(type='PackKIEInputs'), + dict(type='PackKIEInputs', meta_keys=('img_path', )), ] diff --git a/mmocr/apis/inferencers/base_mmocr_inferencer.py b/mmocr/apis/inferencers/base_mmocr_inferencer.py index e9ac5e09..69b8894d 100644 --- a/mmocr/apis/inferencers/base_mmocr_inferencer.py +++ b/mmocr/apis/inferencers/base_mmocr_inferencer.py @@ -80,6 +80,7 @@ class BaseMMOCRInferencer(BaseInferencer): Args: inputs (InputsType): Inputs for the inferencer. It can be a path to image / image directory, or an array, or a list of these. + Note: If it's an numpy array, it should be in BGR order. return_datasamples (bool): Whether to return results as :obj:`BaseDataElement`. Defaults to False. batch_size (int): Inference batch size. Defaults to 1. @@ -206,7 +207,7 @@ class BaseMMOCRInferencer(BaseInferencer): img = mmcv.imfrombytes(img_bytes, channel_order='rgb') img_name = osp.basename(single_input) elif isinstance(single_input, np.ndarray): - img = single_input.copy() + img = single_input.copy()[:, :, ::-1] # to RGB img_num = str(self.num_visualized_imgs).zfill(8) img_name = f'{img_num}.jpg' else: diff --git a/mmocr/apis/inferencers/kie_inferencer.py b/mmocr/apis/inferencers/kie_inferencer.py index aee207c1..7944798e 100644 --- a/mmocr/apis/inferencers/kie_inferencer.py +++ b/mmocr/apis/inferencers/kie_inferencer.py @@ -77,6 +77,13 @@ class KIEInferencer(BaseMMOCRInferencer): self.novisual = all( self._get_transform_idx(pipeline_cfg, t) == -1 for t in self.loading_transforms) + # Remove Resize from test_pipeline, since SDMGR requires bbox + # annotations to be resized together with pictures, but visualization + # loads the original image from the disk. + # TODO: find a more elegant way to fix this + idx = self._get_transform_idx(pipeline_cfg, 'Resize') + if idx != -1: + pipeline_cfg.pop(idx) # If it's in non-visual mode, self.pipeline will be specified. # Otherwise, file_pipeline and ndarray_pipeline will be specified. if self.novisual: @@ -93,6 +100,7 @@ class KIEInferencer(BaseMMOCRInferencer): - img (str or ndarray): Path to the image or the image itself. If KIE Inferencer is used in no-visual mode, this key is not required. + Note: If it's an numpy array, it should be in BGR order. - img_shape (tuple(int, int)): Image shape in (H, W). In - instances (list[dict]): A list of instances. - bbox (ndarray(dtype=np.float32)): Shape (4, ). Bounding box. @@ -182,6 +190,7 @@ class KIEInferencer(BaseMMOCRInferencer): - img (str or ndarray): Path to the image or the image itself. If KIE Inferencer is used in no-visual mode, this key is not required. + Note: If it's an numpy array, it should be in BGR order. - img_shape (tuple(int, int)): Image shape in (H, W). In - instances (list[dict]): A list of instances. - bbox (ndarray(dtype=np.float32)): Shape (4, ). Bounding box. @@ -286,10 +295,10 @@ class KIEInferencer(BaseMMOCRInferencer): assert 'img' in single_input or 'img_shape' in single_input if 'img' in single_input: if isinstance(single_input['img'], str): - img = mmcv.imread(single_input['img']) + img = mmcv.imread(single_input['img'], channel_order='rgb') img_name = osp.basename(single_input['img']) elif isinstance(single_input['img'], np.ndarray): - img = single_input['img'].copy() + img = single_input['img'].copy()[:, :, ::-1] # To RGB img_name = f'{img_num}.jpg' elif 'img_shape' in single_input: img = np.zeros(single_input['img_shape'], dtype=np.uint8) diff --git a/mmocr/ocr.py b/mmocr/ocr.py index b96e1342..216f9a41 100755 --- a/mmocr/ocr.py +++ b/mmocr/ocr.py @@ -46,7 +46,7 @@ def parse_args(): help='Pretrained key information extraction algorithm. It\'s the path' 'to the config file or the model name defined in metafile.') parser.add_argument( - '--kie-ckpt', + '--kie-weights', type=str, default=None, help='Path to the custom checkpoint file of the selected kie model. ' @@ -77,7 +77,8 @@ def parse_args(): call_args = vars(parser.parse_args()) init_kws = [ - 'det', 'det_weights', 'rec', 'rec_weights', 'kie', 'kie_ckpt', 'device' + 'det', 'det_weights', 'rec', 'rec_weights', 'kie', 'kie_weights', + 'device' ] init_args = {} for init_kw in init_kws: diff --git a/tests/test_apis/test_inferencers/test_textdet_inferencer.py b/tests/test_apis/test_inferencers/test_textdet_inferencer.py index ef6fe522..60130955 100644 --- a/tests/test_apis/test_inferencers/test_textdet_inferencer.py +++ b/tests/test_apis/test_inferencers/test_textdet_inferencer.py @@ -54,8 +54,9 @@ class TestTextDetinferencer(TestCase): res_ndarray = self.inferencer(img, return_vis=True) self.assert_predictions_equal(res_path['predictions'], res_ndarray['predictions']) - self.assertIn('visualization', res_path) - self.assertIn('visualization', res_ndarray) + self.assertTrue( + np.allclose(res_path['visualization'], + res_ndarray['visualization'])) # multiple images img_paths = [ @@ -68,8 +69,10 @@ class TestTextDetinferencer(TestCase): res_ndarray = self.inferencer(imgs, return_vis=True) self.assert_predictions_equal(res_path['predictions'], res_ndarray['predictions']) - self.assertIn('visualization', res_path) - self.assertIn('visualization', res_ndarray) + for i in range(len(img_paths)): + self.assertTrue( + np.allclose(res_path['visualization'][i], + res_ndarray['visualization'][i])) # img dir, test different batch sizes img_dir = 'tests/data/det_toy_dataset/imgs/test/' diff --git a/tests/test_apis/test_inferencers/test_textrec_inferencer.py b/tests/test_apis/test_inferencers/test_textrec_inferencer.py index 7f608620..801110cf 100644 --- a/tests/test_apis/test_inferencers/test_textrec_inferencer.py +++ b/tests/test_apis/test_inferencers/test_textrec_inferencer.py @@ -52,8 +52,9 @@ class TestTextRecinferencer(TestCase): res_ndarray = self.inferencer(img, return_vis=True) self.assert_predictions_equal(res_path['predictions'], res_ndarray['predictions']) - self.assertIn('visualization', res_path) - self.assertIn('visualization', res_ndarray) + self.assertTrue( + np.allclose(res_path['visualization'], + res_ndarray['visualization'])) # multiple images img_paths = [ @@ -66,8 +67,10 @@ class TestTextRecinferencer(TestCase): res_ndarray = self.inferencer(imgs, return_vis=True) self.assert_predictions_equal(res_path['predictions'], res_ndarray['predictions']) - self.assertIn('visualization', res_path) - self.assertIn('visualization', res_ndarray) + for i in range(len(img_paths)): + self.assertTrue( + np.allclose(res_path['visualization'][i], + res_ndarray['visualization'][i])) # img dir, test different batch sizes img_dir = 'tests/data/rec_toy_dataset/imgs'