From 4c1790b3c6f86bc213f119645d53da5f09459c69 Mon Sep 17 00:00:00 2001 From: leezeeyee <47478961+easilylazy@users.noreply.github.com> Date: Sat, 16 Jul 2022 21:32:15 +0800 Subject: [PATCH] [Fix] fix typo of --lmdb-map-size default value (#1147) * fix typo of --lmdb-map-size default value * fix Co-authored-by: gaotongxiao <gaotongxiao@gmail.com> --- docs/en/tools.md | 20 ++++++++++---------- mmocr/utils/lmdb_util.py | 2 +- tools/data/utils/lmdb_converter.py | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/en/tools.md b/docs/en/tools.md index 39451971..fe594670 100644 --- a/docs/en/tools.md +++ b/docs/en/tools.md @@ -24,16 +24,16 @@ The final output filename will be `psenet_r50_fpnf_sbn_1x_20190801-{hash id}.pth Reading images or labels from files can be slow when data are excessive, e.g. on a scale of millions. Besides, in academia, most of the scene text recognition datasets are stored in lmdb format, including images and labels. To get closer to the mainstream practice and enhance the data storage efficiency, MMOCR now provides `tools/data/utils/lmdb_converter.py` to convert text recognition datasets to lmdb format. -| Arguments | Type | Description | -| ----------------- | ---- | ------------------------------------------------------------------ | -| `label_path` | str | Path to label file. | -| `output` | str | Output lmdb path. | -| `--img-root` | str | Input imglist path. | -| `--label-only` | bool | Only converter label to lmdb | -| `--label-format` | str | The format of the label file, either txt or jsonl. | -| `--batch-size` | int | Processing batch size, defaults to 1000 | -| `--encoding` | str | Bytes coding scheme, defaults to utf8. | -| `--lmdb-map-size` | int | Maximum size database may grow to , defaults to 109951162776 bytes | +| Arguments | Type | Description | +| ----------------- | ---- | ------------------------------------------------------------------------- | +| `label_path` | str | Path to label file. | +| `output` | str | Output lmdb path. | +| `--img-root` | str | Input imglist path. | +| `--label-only` | bool | Only converter label to lmdb | +| `--label-format` | str | The format of the label file, either txt or jsonl. | +| `--batch-size` | int | Processing batch size, defaults to 1000 | +| `--encoding` | str | Bytes coding scheme, defaults to utf8. | +| `--lmdb-map-size` | int | Maximum size database may grow to , defaults to 1099511627776 bytes (1TB) | ### Examples diff --git a/mmocr/utils/lmdb_util.py b/mmocr/utils/lmdb_util.py index 7b2d4009..0843e63a 100644 --- a/mmocr/utils/lmdb_util.py +++ b/mmocr/utils/lmdb_util.py @@ -44,7 +44,7 @@ def recog2lmdb(img_root, label_only=False, batch_size=1000, encoding='utf-8', - lmdb_map_size=109951162776, + lmdb_map_size=1099511627776, verify=True): """Create text recognition dataset to LMDB format. diff --git a/tools/data/utils/lmdb_converter.py b/tools/data/utils/lmdb_converter.py index 19c35b2f..e9f71ea7 100644 --- a/tools/data/utils/lmdb_converter.py +++ b/tools/data/utils/lmdb_converter.py @@ -36,9 +36,9 @@ def main(): '--lmdb-map-size', '-m', type=int, - default=109951162776, + default=1099511627776, help='Maximum size database may grow to, ' - 'defaults to 109951162776 bytes') + 'defaults to 1099511627776 bytes (1TB)') opt = parser.parse_args() assert opt.img_root or opt.label_only