mmocr/tests/test_models/test_ner_model.py

# Copyright (c) OpenMMLab. All rights reserved.
import copy
import os.path as osp
import tempfile

import pytest
import torch

from mmocr.models import build_detector


def _create_dummy_vocab_file(vocab_file):
    with open(vocab_file, 'w') as fw:
        for char in list(map(chr, range(ord('a'), ord('z') + 1))):
            fw.write(char + '\n')


def _get_config_module(fname):
    """Load a configuration as a python module."""
    from mmcv import Config
    config_mod = Config.fromfile(fname)
    return config_mod


def _get_detector_cfg(fname):
    """Grab configs necessary to create a detector.

    These are deep copied to allow for safe modification of parameters without
    influencing other tests.
    """
    config = _get_config_module(fname)
    model = copy.deepcopy(config.model)
    return model


@pytest.mark.parametrize(
    'cfg_file', ['configs/ner/bert_softmax/bert_softmax_cluener_18e.py'])
def test_bert_softmax(cfg_file):
    # prepare data
    texts = ['中'] * 47
    img = [31] * 47
    labels = [31] * 128
    input_ids = [0] * 128
    attention_mask = [0] * 128
    token_type_ids = [0] * 128
    img_metas = {
        'texts': texts,
        'labels': torch.tensor(labels).unsqueeze(0),
        'img': img,
        'input_ids': torch.tensor(input_ids).unsqueeze(0),
        'attention_masks': torch.tensor(attention_mask).unsqueeze(0),
        'token_type_ids': torch.tensor(token_type_ids).unsqueeze(0)
    }

    # create dummy data
    tmp_dir = tempfile.TemporaryDirectory()
    vocab_file = osp.join(tmp_dir.name, 'fake_vocab.txt')
    _create_dummy_vocab_file(vocab_file)

    model = _get_detector_cfg(cfg_file)
    model['label_convertor']['vocab_file'] = vocab_file

    detector = build_detector(model)
    losses = detector.forward(img, img_metas)
    assert isinstance(losses, dict)

    model['loss']['type'] = 'MaskedFocalLoss'
    detector = build_detector(model)
    losses = detector.forward(img, img_metas)
    assert isinstance(losses, dict)

    tmp_dir.cleanup()

    # Test forward test
    with torch.no_grad():
        batch_results = []
        result = detector.forward(None, img_metas, return_loss=False)
        batch_results.append(result)
[Enhancement] Add copyright info (#439) * add copyright info 2021-08-17 17:39:30 +08:00			`# Copyright (c) OpenMMLab. All rights reserved.`
Ner task (#148) * update ner standard code format * add pytest * fix pre-commit * Annotate the dataset section * fix pre-commit for dataset * rm big files and add comments in dataset * rename configs for ner task * minor changes if metric * Note modification * fix pre-commit * detail modification * rm transform * rm magic number * fix warnings in pylint * fix pre-commit * correct help info * rename model files * rename err fixed * 428_tag * Adjust to more general pipline * update unit test rate * update * Unit test coverage over 90% and add Readme * modify details * fix precommit * update * fix pre-commit * update * update * update * update result * update readme * update baseline config * update config and small minor changes * minor changes in readme and etc. * back to original * update toy config * upload model and log * fix pytest * Modify the notes. * fix readme * Delete Chinese punctuation * add demo and fix some logic and naming problems * add To_tensor transformer for ner and load pretrained model in config * delete extra lines * split ner loss to MaskedCrossEntropyLoss and MaskedFocalLoss * update config * fix err * updata * modify noqa * update new model report * fix err in ner demo * Update ner_dataset.py * Update test_ner_dataset.py * Update ner_dataset.py * Update ner_transforms.py * rm toy config and data * add comment * add empty * fix conflict * fix precommit * fix pytest * fix pytest err * Update ner_dataset.py * change dataset name to cluener2020 * move the postprocess in metric to convertor * rm __init__ etc. * precommit * add discription in loss * add auto download * add http * update * remove some 'issert' * replace unsqueeze * update config * update doc and bert.py * update * update demo code Co-authored-by: weihuaqiang <weihuaqiang@sensetime.com> Co-authored-by: Hongbin Sun <hongbin306@gmail.com> 2021-05-18 11:33:51 +08:00			`import copy`
			`import os.path as osp`
			`import tempfile`

			`import pytest`
			`import torch`

			`from mmocr.models import build_detector`


			`def _create_dummy_vocab_file(vocab_file):`
			`with open(vocab_file, 'w') as fw:`
			`for char in list(map(chr, range(ord('a'), ord('z') + 1))):`
			`fw.write(char + '\n')`


			`def _get_config_module(fname):`
			`"""Load a configuration as a python module."""`
			`from mmcv import Config`
			`config_mod = Config.fromfile(fname)`
			`return config_mod`


			`def _get_detector_cfg(fname):`
			`"""Grab configs necessary to create a detector.`

			`These are deep copied to allow for safe modification of parameters without`
			`influencing other tests.`
			`"""`
			`config = _get_config_module(fname)`
			`model = copy.deepcopy(config.model)`
			`return model`


			`@pytest.mark.parametrize(`
			`'cfg_file', ['configs/ner/bert_softmax/bert_softmax_cluener_18e.py'])`
Hbsun/feature iss205 (#210) * fix #205: remove act2fn * fix pytest 2021-05-18 15:15:35 +08:00			`def test_bert_softmax(cfg_file):`
Ner task (#148) * update ner standard code format * add pytest * fix pre-commit * Annotate the dataset section * fix pre-commit for dataset * rm big files and add comments in dataset * rename configs for ner task * minor changes if metric * Note modification * fix pre-commit * detail modification * rm transform * rm magic number * fix warnings in pylint * fix pre-commit * correct help info * rename model files * rename err fixed * 428_tag * Adjust to more general pipline * update unit test rate * update * Unit test coverage over 90% and add Readme * modify details * fix precommit * update * fix pre-commit * update * update * update * update result * update readme * update baseline config * update config and small minor changes * minor changes in readme and etc. * back to original * update toy config * upload model and log * fix pytest * Modify the notes. * fix readme * Delete Chinese punctuation * add demo and fix some logic and naming problems * add To_tensor transformer for ner and load pretrained model in config * delete extra lines * split ner loss to MaskedCrossEntropyLoss and MaskedFocalLoss * update config * fix err * updata * modify noqa * update new model report * fix err in ner demo * Update ner_dataset.py * Update test_ner_dataset.py * Update ner_dataset.py * Update ner_transforms.py * rm toy config and data * add comment * add empty * fix conflict * fix precommit * fix pytest * fix pytest err * Update ner_dataset.py * change dataset name to cluener2020 * move the postprocess in metric to convertor * rm __init__ etc. * precommit * add discription in loss * add auto download * add http * update * remove some 'issert' * replace unsqueeze * update config * update doc and bert.py * update * update demo code Co-authored-by: weihuaqiang <weihuaqiang@sensetime.com> Co-authored-by: Hongbin Sun <hongbin306@gmail.com> 2021-05-18 11:33:51 +08:00			`# prepare data`
			`texts = ['中'] * 47`
			`img = [31] * 47`
			`labels = [31] * 128`
			`input_ids = [0] * 128`
			`attention_mask = [0] * 128`
			`token_type_ids = [0] * 128`
			`img_metas = {`
			`'texts': texts,`
			`'labels': torch.tensor(labels).unsqueeze(0),`
			`'img': img,`
			`'input_ids': torch.tensor(input_ids).unsqueeze(0),`
			`'attention_masks': torch.tensor(attention_mask).unsqueeze(0),`
			`'token_type_ids': torch.tensor(token_type_ids).unsqueeze(0)`
			`}`

			`# create dummy data`
			`tmp_dir = tempfile.TemporaryDirectory()`
			`vocab_file = osp.join(tmp_dir.name, 'fake_vocab.txt')`
			`_create_dummy_vocab_file(vocab_file)`

			`model = _get_detector_cfg(cfg_file)`
			`model['label_convertor']['vocab_file'] = vocab_file`

			`detector = build_detector(model)`
			`losses = detector.forward(img, img_metas)`
			`assert isinstance(losses, dict)`

			`model['loss']['type'] = 'MaskedFocalLoss'`
			`detector = build_detector(model)`
			`losses = detector.forward(img, img_metas)`
			`assert isinstance(losses, dict)`

			`tmp_dir.cleanup()`

			`# Test forward test`
			`with torch.no_grad():`
			`batch_results = []`
			`result = detector.forward(None, img_metas, return_loss=False)`
			`batch_results.append(result)`