mmocr/tests/test_dataset/test_loader.py

89 lines
3.0 KiB
Python
Raw Normal View History

# Copyright (c) OpenMMLab. All rights reserved.
2021-04-02 23:54:57 +08:00
import json
import os.path as osp
import tempfile
import pytest
from mmocr.datasets.utils.backend import (HardDiskAnnFileBackend,
HTTPAnnFileBackend,
PetrelAnnFileBackend)
from mmocr.datasets.utils.loader import (AnnFileLoader, HardDiskLoader,
LmdbLoader)
from mmocr.utils import lmdb_converter
2021-04-02 23:54:57 +08:00
def _create_dummy_line_str_file(ann_file):
ann_info1 = 'sample1.jpg hello'
ann_info2 = 'sample2.jpg world'
with open(ann_file, 'w') as fw:
for ann_info in [ann_info1, ann_info2]:
fw.write(ann_info + '\n')
def _create_dummy_line_json_file(ann_file):
ann_info1 = {'filename': 'sample1.jpg', 'text': 'hello'}
ann_info2 = {'filename': 'sample2.jpg', 'text': 'world'}
with open(ann_file, 'w') as fw:
for ann_info in [ann_info1, ann_info2]:
fw.write(json.dumps(ann_info) + '\n')
def test_loader():
tmp_dir = tempfile.TemporaryDirectory()
# create dummy data
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
_create_dummy_line_str_file(ann_file)
parser = dict(
type='LineStrParser',
keys=['filename', 'text'],
keys_idx=[0, 1],
separator=' ')
with pytest.raises(AssertionError):
AnnFileLoader(ann_file, parser, repeat=0)
2021-04-02 23:54:57 +08:00
with pytest.raises(AssertionError):
AnnFileLoader(ann_file, [], repeat=1)
2021-04-02 23:54:57 +08:00
# test text loader and line str parser
text_loader = HardDiskLoader(ann_file, parser, repeat=1)
assert len(text_loader) == 2
assert text_loader.ori_data_infos[0] == 'sample1.jpg hello'
assert text_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
# test text loader and linedict parser
_create_dummy_line_json_file(ann_file)
json_parser = dict(type='LineJsonParser', keys=['filename', 'text'])
text_loader = HardDiskLoader(ann_file, json_parser, repeat=1)
assert text_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
# test text loader and linedict parser
_create_dummy_line_json_file(ann_file)
json_parser = dict(type='LineJsonParser', keys=['filename', 'text'])
text_loader = HardDiskLoader(ann_file, json_parser, repeat=1)
it = iter(text_loader)
with pytest.raises(StopIteration):
for _ in range(len(text_loader) + 1):
next(it)
2021-04-02 23:54:57 +08:00
# test lmdb loader and line str parser
_create_dummy_line_str_file(ann_file)
lmdb_file = osp.join(tmp_dir.name, 'fake_data.lmdb')
lmdb_converter(ann_file, lmdb_file, lmdb_map_size=102400)
2021-04-02 23:54:57 +08:00
lmdb_loader = LmdbLoader(lmdb_file, parser, repeat=1)
assert lmdb_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
lmdb_loader.close()
with pytest.raises(AssertionError):
HardDiskAnnFileBackend(file_format='json')
with pytest.raises(AssertionError):
PetrelAnnFileBackend(file_format='json')
with pytest.raises(AssertionError):
HTTPAnnFileBackend(file_format='json')
2021-04-02 23:54:57 +08:00
tmp_dir.cleanup()