Remove test dependency on tools

Signed-off-by: lizz <lizz@sensetime.com>
2025-06-03 21:54:47 +08:00 · 2021-04-05 21:00:41 +08:00 · 2021-04-05 21:00:41 +08:00 · 09ffd284ee
commit 09ffd284ee
parent cc1f103e1c
4 changed files with 52 additions and 49 deletions
--- a/mmocr/utils/init.py
+++ b/mmocr/utils/init.py
@ -4,9 +4,10 @@ from mmdet.utils import get_root_logger
 from .check_argument import (equal_len, is_2dlist, is_3dlist, is_ndarray_list,
                             is_none_or_type, is_type_list, valid_boundary)
 from .collect_env import collect_env
+from .lmdb_util import lmdb_converter

 __all__ = [
    'Registry', 'build_from_cfg', 'get_root_logger', 'collect_env',
    'is_3dlist', 'is_ndarray_list', 'is_type_list', 'is_none_or_type',
-    'equal_len', 'is_2dlist', 'valid_boundary'
+    'equal_len', 'is_2dlist', 'valid_boundary', 'lmdb_converter'
 ]
--- a/mmocr/utils/lmdb_util.py
+++ b/mmocr/utils/lmdb_util.py
@ -0,0 +1,46 @@
+import shutil
+import sys
+import time
+from pathlib import Path
+
+import lmdb
+
+
+def lmdb_converter(imglist, output, batch_size=1000, coding='utf-8'):
+    # read imglist
+    with open(imglist) as f:
+        lines = f.readlines()
+
+    # create lmdb database
+    if Path(output).is_dir():
+        while True:
+            print('%s already exist, delete or not? [Y/n]' % output)
+            Yn = input().strip()
+            if Yn in ['Y', 'y']:
+                shutil.rmtree(output)
+                break
+            elif Yn in ['N', 'n']:
+                return
+    print('create database %s' % output)
+    Path(output).mkdir(parents=True, exist_ok=False)
+    env = lmdb.open(output, map_size=1099511627776)
+
+    # build lmdb
+    beg_time = time.strftime('%H:%M:%S')
+    for beg_index in range(0, len(lines), batch_size):
+        end_index = min(beg_index + batch_size, len(lines))
+        sys.stdout.write('\r[%s-%s], processing [%d-%d] / %d' %
+                         (beg_time, time.strftime('%H:%M:%S'), beg_index,
+                          end_index, len(lines)))
+        sys.stdout.flush()
+        batch = [(str(index).encode(coding), lines[index].encode(coding))
+                 for index in range(beg_index, end_index)]
+        with env.begin(write=True) as txn:
+            cursor = txn.cursor()
+            cursor.putmulti(batch, dupdata=False, overwrite=True)
+    sys.stdout.write('\n')
+    with env.begin(write=True) as txn:
+        key = 'total_number'.encode(coding)
+        value = str(len(lines)).encode(coding)
+        txn.put(key, value)
+    print('done', flush=True)
--- a/tests/test_dataset/test_loader.py
+++ b/tests/test_dataset/test_loader.py
@ -3,9 +3,9 @@ import os.path as osp
 import tempfile

 import pytest
-from tools.data.utils.txt2lmdb import converter

 from mmocr.datasets.utils.loader import HardDiskLoader, LmdbLoader, Loader
+from mmocr.utils import lmdb_converter


 def _create_dummy_line_str_file(ann_file):
@ -63,7 +63,7 @@ def test_loader():
    # test lmdb loader and line str parser
    _create_dummy_line_str_file(ann_file)
    lmdb_file = osp.join(tmp_dir.name, 'fake_data.lmdb')
-    converter(ann_file, lmdb_file)
+    lmdb_converter(ann_file, lmdb_file)

    lmdb_loader = LmdbLoader(lmdb_file, parser, repeat=1)
    assert lmdb_loader[0] == {'filename': 'sample1.jpg', 'text': 'hello'}
--- a/tools/data/utils/txt2lmdb.py
+++ b/tools/data/utils/txt2lmdb.py
@ -1,50 +1,6 @@
 import argparse
-import shutil
-import sys
-import time
-from pathlib import Path

-import lmdb
-
-
-def converter(imglist, output, batch_size=1000, coding='utf-8'):
-    # read imglist
-    with open(imglist) as f:
-        lines = f.readlines()
-
-    # create lmdb database
-    if Path(output).is_dir():
-        while True:
-            print('%s already exist, delete or not? [Y/n]' % output)
-            Yn = input().strip()
-            if Yn in ['Y', 'y']:
-                shutil.rmtree(output)
-                break
-            elif Yn in ['N', 'n']:
-                return
-    print('create database %s' % output)
-    Path(output).mkdir(parents=True, exist_ok=False)
-    env = lmdb.open(output, map_size=1099511627776)
-
-    # build lmdb
-    beg_time = time.strftime('%H:%M:%S')
-    for beg_index in range(0, len(lines), batch_size):
-        end_index = min(beg_index + batch_size, len(lines))
-        sys.stdout.write('\r[%s-%s], processing [%d-%d] / %d' %
-                         (beg_time, time.strftime('%H:%M:%S'), beg_index,
-                          end_index, len(lines)))
-        sys.stdout.flush()
-        batch = [(str(index).encode(coding), lines[index].encode(coding))
-                 for index in range(beg_index, end_index)]
-        with env.begin(write=True) as txn:
-            cursor = txn.cursor()
-            cursor.putmulti(batch, dupdata=False, overwrite=True)
-    sys.stdout.write('\n')
-    with env.begin(write=True) as txn:
-        key = 'total_number'.encode(coding)
-        value = str(len(lines)).encode(coding)
-        txn.put(key, value)
-    print('done', flush=True)
+from mmocr.utils import lmdb_converter


 def main():
@ -66,7 +22,7 @@ def main():
        help='bytes coding scheme, default utf8')
    opt = parser.parse_args()

-    converter(
+    lmdb_converter(
        opt.imglist, opt.output, batch_size=opt.batch_size, coding=opt.coding)