mmocr/tests/test_dataset/test_loader.py
Hongbin Sun 402e8f1162
[Feature] Support loading annotation file with petrel/http backend (#793)
* add petrel backend for ann_file loading

* support icdar dataset

* add http backend

* name back

* fix pytest

* support load lmdb file on ceph

* refactor

* add close for lmdb

* add log and warning msg

* rename

* typo

* update

* fix
2022-03-07 19:18:02 +08:00

89 lines
3.0 KiB
Python

# Copyright (c) OpenMMLab. All rights reserved.
import json
import os.path as osp
import tempfile
import pytest
from mmocr.datasets.utils.backend import (HardDiskAnnFileBackend,
HTTPAnnFileBackend,
PetrelAnnFileBackend)
from mmocr.datasets.utils.loader import (AnnFileLoader, HardDiskLoader,
LmdbLoader)
from mmocr.utils import lmdb_converter
def _create_dummy_line_str_file(ann_file):
ann_info1 = 'sample1.jpg hello'
ann_info2 = 'sample2.jpg world'
with open(ann_file, 'w') as fw:
for ann_info in [ann_info1, ann_info2]:
fw.write(ann_info + '\n')
def _create_dummy_line_json_file(ann_file):
ann_info1 = {'filename': 'sample1.jpg', 'text': 'hello'}
ann_info2 = {'filename': 'sample2.jpg', 'text': 'world'}
with open(ann_file, 'w') as fw:
for ann_info in [ann_info1, ann_info2]:
fw.write(json.dumps(ann_info) + '\n')
def test_loader():
tmp_dir = tempfile.TemporaryDirectory()
# create dummy data
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
_create_dummy_line_str_file(ann_file)
parser = dict(
type='LineStrParser',
keys=['filename', 'text'],
keys_idx=[0, 1],
separator=' ')
with pytest.raises(AssertionError):
AnnFileLoader(ann_file, parser, repeat=0)
with pytest.raises(AssertionError):
AnnFileLoader(ann_file, [], repeat=1)
# test text loader and line str parser
text_loader = HardDiskLoader(ann_file, parser, repeat=1)
assert len(text_loader) == 2
assert text_loader.ori_data_infos[0] == 'sample1.jpg hello'
assert text_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
# test text loader and linedict parser
_create_dummy_line_json_file(ann_file)
json_parser = dict(type='LineJsonParser', keys=['filename', 'text'])
text_loader = HardDiskLoader(ann_file, json_parser, repeat=1)
assert text_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
# test text loader and linedict parser
_create_dummy_line_json_file(ann_file)
json_parser = dict(type='LineJsonParser', keys=['filename', 'text'])
text_loader = HardDiskLoader(ann_file, json_parser, repeat=1)
it = iter(text_loader)
with pytest.raises(StopIteration):
for _ in range(len(text_loader) + 1):
next(it)
# test lmdb loader and line str parser
_create_dummy_line_str_file(ann_file)
lmdb_file = osp.join(tmp_dir.name, 'fake_data.lmdb')
lmdb_converter(ann_file, lmdb_file, lmdb_map_size=102400)
lmdb_loader = LmdbLoader(lmdb_file, parser, repeat=1)
assert lmdb_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
lmdb_loader.close()
with pytest.raises(AssertionError):
HardDiskAnnFileBackend(file_format='json')
with pytest.raises(AssertionError):
PetrelAnnFileBackend(file_format='json')
with pytest.raises(AssertionError):
HTTPAnnFileBackend(file_format='json')
tmp_dir.cleanup()