fix #40: add unit test for recog config, transforms, etc. (#44)

2025-06-03 21:54:47 +08:00 · 2021-04-06 15:17:20 +08:00 · 2021-04-06 15:17:20 +08:00 · fbb5c8cda1
commit fbb5c8cda1
parent c8793ac141
7 changed files with 335 additions and 14 deletions
--- a/mmocr/models/textrecog/recognizer/crnn.py
+++ b/mmocr/models/textrecog/recognizer/crnn.py
@ -1,6 +1,3 @@
-import torch
-import torch.nn.functional as F
-
 from mmdet.models.builder import DETECTORS
 from .encode_decode_recognizer import EncodeDecodeRecognizer

@ -8,11 +5,3 @@ from .encode_decode_recognizer import EncodeDecodeRecognizer
@DETECTORS.register_module()
 class CRNNNet(EncodeDecodeRecognizer):
    """CTC-loss based recognizer."""
-
-    def forward_conversion(self, params, img):
-        x = self.extract_feat(img)
-        x = self.encoder(x)
-        outs = self.decoder(x)
-        outs = F.softmax(outs, dim=2)
-        params = torch.pow(params, 1)
-        return outs, params
--- a/tests/test_dataset/test_kie_dataset.py
+++ b/tests/test_dataset/test_kie_dataset.py
@ -0,0 +1,114 @@
+import json
+import math
+import os.path as osp
+import tempfile
+
+import pytest
+import torch
+
+from mmocr.datasets.kie_dataset import KIEDataset
+
+
+def _create_dummy_ann_file(ann_file):
+    ann_info1 = {
+        'file_name':
+        'sample1.png',
+        'height':
+        200,
+        'width':
+        200,
+        'annotations': [{
+            'text': 'store',
+            'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0],
+            'label': 1
+        }, {
+            'text': 'address',
+            'box': [23.0, 2.0, 31.0, 1.0, 24.0, 11.0, 16.0, 11.0],
+            'label': 1
+        }, {
+            'text': 'price',
+            'box': [33.0, 2.0, 43.0, 2.0, 36.0, 12.0, 25.0, 12.0],
+            'label': 1
+        }, {
+            'text': '1.0',
+            'box': [46.0, 2.0, 61.0, 2.0, 53.0, 12.0, 39.0, 12.0],
+            'label': 1
+        }, {
+            'text': 'google',
+            'box': [61.0, 2.0, 69.0, 2.0, 63.0, 12.0, 55.0, 12.0],
+            'label': 1
+        }]
+    }
+    with open(ann_file, 'w') as fw:
+        for ann_info in [ann_info1]:
+            fw.write(json.dumps(ann_info) + '\n')
+
+    return ann_info1
+
+
+def _create_dummy_dict_file(dict_file):
+    dict_str = '0123'
+    with open(dict_file, 'w') as fw:
+        for char in list(dict_str):
+            fw.write(char + '\n')
+
+    return dict_str
+
+
+def _create_dummy_loader():
+    loader = dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineJsonParser',
+            keys=['file_name', 'height', 'width', 'annotations']))
+    return loader
+
+
+def test_kie_dataset():
+    tmp_dir = tempfile.TemporaryDirectory()
+    # create dummy data
+    ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
+    ann_info1 = _create_dummy_ann_file(ann_file)
+
+    dict_file = osp.join(tmp_dir.name, 'fake_dict.txt')
+    _create_dummy_dict_file(dict_file)
+
+    # test initialization
+    loader = _create_dummy_loader()
+    dataset = KIEDataset(ann_file, loader, dict_file, pipeline=[])
+
+    tmp_dir.cleanup()
+
+    # test pre_pipeline
+    img_info = dataset.data_infos[0]
+    results = dict(img_info=img_info)
+    dataset.pre_pipeline(results)
+    assert results['img_prefix'] == dataset.img_prefix
+
+    # test _parse_anno_info
+    annos = ann_info1['annotations']
+    with pytest.raises(AssertionError):
+        dataset._parse_anno_info(annos[0])
+    tmp_annos = [{
+        'text': 'store',
+        'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0]
+    }]
+    with pytest.raises(AssertionError):
+        dataset._parse_anno_info(tmp_annos)
+
+    return_anno = dataset._parse_anno_info(annos)
+    assert 'bboxes' in return_anno
+    assert 'relations' in return_anno
+    assert 'texts' in return_anno
+    assert 'labels' in return_anno
+
+    # test evaluation
+    result = {}
+    result['nodes'] = torch.full((5, 5), 1, dtype=torch.float)
+    result['nodes'][:, 1] = 100.
+    print('hello', result['nodes'].size())
+    results = [result for _ in range(5)]
+
+    eval_res = dataset.evaluate(results)
+    assert math.isclose(eval_res['macro_f1'], 0.2, abs_tol=1e-4)
--- a/tests/test_dataset/test_ocr_transforms.py
+++ b/tests/test_dataset/test_ocr_transforms.py
@ -4,6 +4,7 @@ import unittest.mock as mock
 import numpy as np
 import torch
 import torchvision.transforms.functional as TF
+from PIL import Image

 import mmocr.datasets.pipelines.ocr_transforms as transforms

@ -92,3 +93,48 @@ def test_online_crop(mock_random):

    results = rci(results)
    assert np.allclose(results['img'].shape, [100, 100, 3])
+
+
+def test_fancy_pca():
+    input_tensor = torch.rand(3, 32, 100)
+
+    rci = transforms.FancyPCA()
+
+    results = {'img': input_tensor}
+    results = rci(results)
+
+    assert results['img'].shape == torch.Size([3, 32, 100])
+
+
+@mock.patch('%s.transforms.np.random.uniform' % __name__)
+def test_random_padding(mock_random):
+    kwargs = dict(max_ratio=[0.0, 0.0, 0.0, 0.0], box_type=None)
+
+    mock_random.side_effect = [1, 1, 1, 1]
+
+    src_img = np.ones((32, 100, 3), dtype=np.uint8)
+    results = {'img': src_img, 'img_shape': (32, 100, 3)}
+
+    rci = transforms.RandomPaddingOCR(**kwargs)
+
+    results = rci(results)
+    print(results['img'].shape)
+    assert np.allclose(results['img_shape'], [96, 300, 3])
+
+
+def test_opencv2pil():
+    src_img = np.ones((32, 100, 3), dtype=np.uint8)
+    results = {'img': src_img}
+    rci = transforms.OpencvToPil()
+
+    results = rci(results)
+    assert np.allclose(results['img'].size, (100, 32))
+
+
+def test_pil2opencv():
+    src_img = Image.new('RGB', (100, 32), color=(255, 255, 255))
+    results = {'img': src_img}
+    rci = transforms.PilToOpencv()
+
+    results = rci(results)
+    assert np.allclose(results['img'].shape, (32, 100, 3))
--- a/tests/test_models/test_ocr_head.py
+++ b/tests/test_models/test_ocr_head.py
@ -4,13 +4,13 @@ import torch
 from mmocr.models.textrecog import SegHead


-def test_cafcn_head():
+def test_seg_head():
    with pytest.raises(AssertionError):
        SegHead(num_classes='100')
    with pytest.raises(AssertionError):
        SegHead(num_classes=-1)

-    cafcn_head = SegHead(num_classes=37)
+    seg_head = SegHead(num_classes=37)
    out_neck = (torch.rand(1, 128, 32, 32), )
-    out_head = cafcn_head(out_neck)
+    out_head = seg_head(out_neck)
    assert out_head.shape == torch.Size([1, 37, 32, 32])
--- a/tests/test_models/test_ocr_loss.py
+++ b/tests/test_models/test_ocr_loss.py
@ -6,6 +6,14 @@ from mmocr.models.textrecog.losses import CELoss, CTCLoss, SARLoss, TFLoss


 def test_ctc_loss():
+    with pytest.raises(AssertionError):
+        CTCLoss(flatten='flatten')
+    with pytest.raises(AssertionError):
+        CTCLoss(blank=None)
+    with pytest.raises(AssertionError):
+        CTCLoss(reduction=1)
+    with pytest.raises(AssertionError):
+        CTCLoss(zero_infinity='zero')
    # test CTCLoss
    ctc_loss = CTCLoss()
    outputs = torch.zeros(2, 40, 37)
--- a/tests/test_models/test_ocr_neck.py
+++ b/tests/test_models/test_ocr_neck.py
@ -0,0 +1,17 @@
+import torch
+
+from mmocr.models.textrecog.necks import FPNOCR
+
+
+def test_fpn_ocr():
+    in_s1 = torch.rand(1, 128, 32, 256)
+    in_s2 = torch.rand(1, 256, 16, 128)
+    in_s3 = torch.rand(1, 512, 8, 64)
+    in_s4 = torch.rand(1, 512, 4, 32)
+
+    fpn_ocr = FPNOCR(in_channels=[128, 256, 512, 512], out_channels=256)
+    fpn_ocr.init_weights()
+    fpn_ocr.train()
+
+    out_neck = fpn_ocr((in_s1, in_s2, in_s3, in_s4))
+    assert out_neck[0].shape == torch.Size([1, 256, 32, 256])
--- a/tests/test_models/test_recog_config.py
+++ b/tests/test_models/test_recog_config.py
@ -0,0 +1,147 @@
+import copy
+from os.path import dirname, exists, join
+
+import numpy as np
+import pytest
+import torch
+
+
+def _demo_mm_inputs(num_kernels=0, input_shape=(1, 3, 300, 300),
+                    num_items=None):  # yapf: disable
+    """Create a superset of inputs needed to run test or train batches.
+
+    Args:
+        input_shape (tuple): Input batch dimensions.
+
+        num_items (None | list[int]): Specifies the number of boxes
+            for each batch item.
+    """
+
+    (N, C, H, W) = input_shape
+
+    rng = np.random.RandomState(0)
+
+    imgs = rng.rand(*input_shape)
+
+    img_metas = [{
+        'img_shape': (H, W, C),
+        'ori_shape': (H, W, C),
+        'pad_shape': (H, W, C),
+        'filename': '<demo>.png',
+        'text': 'hello',
+        'valid_ratio': 1.0,
+    } for _ in range(N)]
+
+    mm_inputs = {
+        'imgs': torch.FloatTensor(imgs).requires_grad_(True),
+        'img_metas': img_metas
+    }
+    return mm_inputs
+
+
+def _demo_gt_kernel_inputs(num_kernels=3, input_shape=(1, 3, 300, 300),
+                           num_items=None):  # yapf: disable
+    """Create a superset of inputs needed to run test or train batches.
+
+    Args:
+        input_shape (tuple): Input batch dimensions.
+
+        num_items (None | list[int]): Specifies the number of boxes
+            for each batch item.
+    """
+    from mmdet.core import BitmapMasks
+
+    (N, C, H, W) = input_shape
+    gt_kernels = []
+
+    for batch_idx in range(N):
+        kernels = []
+        for kernel_inx in range(num_kernels):
+            kernel = np.random.rand(H, W)
+            kernels.append(kernel)
+        gt_kernels.append(BitmapMasks(kernels, H, W))
+
+    return gt_kernels
+
+
+def _get_config_directory():
+    """Find the predefined detector config directory."""
+    try:
+        # Assume we are running in the source mmocr repo
+        repo_dpath = dirname(dirname(dirname(__file__)))
+    except NameError:
+        # For IPython development when this __file__ is not defined
+        import mmocr
+        repo_dpath = dirname(dirname(mmocr.__file__))
+    config_dpath = join(repo_dpath, 'configs')
+    if not exists(config_dpath):
+        raise Exception('Cannot find config path')
+    return config_dpath
+
+
+def _get_config_module(fname):
+    """Load a configuration as a python module."""
+    from mmcv import Config
+    config_dpath = _get_config_directory()
+    config_fpath = join(config_dpath, fname)
+    config_mod = Config.fromfile(config_fpath)
+    return config_mod
+
+
+def _get_detector_cfg(fname):
+    """Grab configs necessary to create a detector.
+
+    These are deep copied to allow for safe modification of parameters without
+    influencing other tests.
+    """
+    config = _get_config_module(fname)
+    model = copy.deepcopy(config.model)
+    return model
+
+
+@pytest.mark.parametrize('cfg_file', [
+    'textrecog/sar/sar_r31_parallel_decoder_academic.py',
+    'textrecog/crnn/crnn_academic_dataset.py',
+    'textrecog/nrtr/nrtr_r31_academic.py',
+    'textrecog/robust_scanner/robustscanner_r31_academic.py',
+    'textrecog/seg/seg_r31_1by16_fpnocr_academic.py'
+])
+def test_encoder_decoder_pipeline(cfg_file):
+    model = _get_detector_cfg(cfg_file)
+    model['pretrained'] = None
+
+    from mmocr.models import build_detector
+    detector = build_detector(model)
+
+    input_shape = (1, 3, 32, 160)
+    if 'crnn' in cfg_file:
+        input_shape = (1, 1, 32, 160)
+    mm_inputs = _demo_mm_inputs(0, input_shape)
+    gt_kernels = None
+    if 'seg' in cfg_file:
+        gt_kernels = _demo_gt_kernel_inputs(3, input_shape)
+
+    imgs = mm_inputs.pop('imgs')
+    img_metas = mm_inputs.pop('img_metas')
+
+    # Test forward train
+    if 'seg' in cfg_file:
+        losses = detector.forward(imgs, img_metas, gt_kernels=gt_kernels)
+    else:
+        losses = detector.forward(imgs, img_metas)
+    assert isinstance(losses, dict)
+
+    # Test forward test
+    with torch.no_grad():
+        img_list = [g[None, :] for g in imgs]
+        batch_results = []
+        for one_img, one_meta in zip(img_list, img_metas):
+            result = detector.forward([one_img], [[one_meta]],
+                                      return_loss=False)
+            batch_results.append(result)
+
+    # Test show_result
+
+    results = {'text': 'hello', 'score': 1.0}
+    img = np.random.rand(5, 5, 3)
+    detector.show_result(img, results)