Qing Jiang d8549ade85
[Feature] Support loading images and labels from lmdb file (#982)
* loss

* fix

* add LoadImageFromLMDB

* update

* new version

* add json

* fix bug

* Compatible with older versions

* modify test_loader

* add docstring and update test_loader

* fix a bug

* fix docstring and update doc

* update

* Update mmocr/datasets/utils/backend.py

Co-authored-by: Xinyu Wang <45810070+xinke-wang@users.noreply.github.com>

* Update docs/en/tutorials/dataset_types.md

Co-authored-by: Xinyu Wang <45810070+xinke-wang@users.noreply.github.com>

* fix lint

Co-authored-by: Xinyu Wang <45810070+xinke-wang@users.noreply.github.com>
2022-04-29 15:46:36 +08:00

55 lines
1.3 KiB
Python
Executable File

dataset_type = 'OCRDataset'
root = 'tests/data/ocr_toy_dataset'
img_prefix = f'{root}/imgs'
train_anno_file1 = f'{root}/label.txt'
train1 = dict(
type=dataset_type,
img_prefix=img_prefix,
ann_file=train_anno_file1,
loader=dict(
type='AnnFileLoader',
repeat=100,
file_format='txt',
file_storage_backend='disk',
parser=dict(
type='LineStrParser',
keys=['filename', 'text'],
keys_idx=[0, 1],
separator=' ')),
pipeline=None,
test_mode=False)
train_anno_file2 = f'{root}/label.lmdb'
train2 = dict(
type=dataset_type,
img_prefix=img_prefix,
ann_file=train_anno_file2,
loader=dict(
type='AnnFileLoader',
repeat=100,
file_format='lmdb',
file_storage_backend='disk',
parser=dict(type='LineJsonParser', keys=['filename', 'text'])),
pipeline=None,
test_mode=False)
test_anno_file1 = f'{root}/label.lmdb'
test = dict(
type=dataset_type,
img_prefix=img_prefix,
ann_file=test_anno_file1,
loader=dict(
type='AnnFileLoader',
repeat=1,
file_format='lmdb',
file_storage_backend='disk',
parser=dict(type='LineJsonParser', keys=['filename', 'text'])),
pipeline=None,
test_mode=True)
train_list = [train1, train2]
test_list = [test]