From b863bbca5e99c8a90535fa9d86da32b907738fd4 Mon Sep 17 00:00:00 2001 From: Hongbin Sun Date: Tue, 25 May 2021 19:58:32 +0800 Subject: [PATCH] use mmcv instead (#237) * use mmcv instead * update --- mmocr/datasets/pipelines/dbnet_transforms.py | 4 ++-- mmocr/datasets/pipelines/ocr_transforms.py | 21 ++++++++++---------- mmocr/datasets/pipelines/transforms.py | 9 +++++---- mmocr/models/ner/convertors/ner_convertor.py | 9 +++++---- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/mmocr/datasets/pipelines/dbnet_transforms.py b/mmocr/datasets/pipelines/dbnet_transforms.py index 1624a282..3be9ed8e 100644 --- a/mmocr/datasets/pipelines/dbnet_transforms.py +++ b/mmocr/datasets/pipelines/dbnet_transforms.py @@ -1,6 +1,6 @@ -import cv2 import imgaug import imgaug.augmenters as iaa +import mmcv import numpy as np from mmdet.core.mask import PolygonMasks @@ -145,7 +145,7 @@ class EastRandomCrop: padded_img = np.zeros( (self.target_size[1], self.target_size[0], img.shape[2]), img.dtype) - padded_img[:h, :w] = cv2.resize( + padded_img[:h, :w] = mmcv.imresize( img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h)) # for bboxes diff --git a/mmocr/datasets/pipelines/ocr_transforms.py b/mmocr/datasets/pipelines/ocr_transforms.py index 02c90c37..263c71d6 100644 --- a/mmocr/datasets/pipelines/ocr_transforms.py +++ b/mmocr/datasets/pipelines/ocr_transforms.py @@ -1,6 +1,5 @@ import math -import cv2 import mmcv import numpy as np import torch @@ -91,8 +90,8 @@ class ResizeOCR: if dst_max_width is not None: valid_ratio = min(1.0, 1.0 * new_width / dst_max_width) resize_width = min(dst_max_width, new_width) - img_resize = cv2.resize(results['img'], - (resize_width, dst_height)) + img_resize = mmcv.imresize(results['img'], + (resize_width, dst_height)) resize_shape = img_resize.shape pad_shape = img_resize.shape if new_width < dst_max_width: @@ -102,13 +101,13 @@ class ResizeOCR: pad_val=self.img_pad_value) pad_shape = img_resize.shape else: - img_resize = cv2.resize(results['img'], - (new_width, dst_height)) + img_resize = mmcv.imresize(results['img'], + (new_width, dst_height)) resize_shape = img_resize.shape pad_shape = img_resize.shape else: - img_resize = cv2.resize(results['img'], - (dst_max_width, dst_height)) + img_resize = mmcv.imresize(results['img'], + (dst_max_width, dst_height)) resize_shape = img_resize.shape pad_shape = img_resize.shape @@ -286,10 +285,10 @@ class RandomPaddingOCR: random_padding_bottom = round( np.random.uniform(0, self.max_ratio[3]) * ori_height) - img = np.copy(results['img']) - img = cv2.copyMakeBorder(img, random_padding_top, - random_padding_bottom, random_padding_left, - random_padding_right, cv2.BORDER_REPLICATE) + padding = (random_padding_left, random_padding_top, + random_padding_right, random_padding_bottom) + img = mmcv.impad(results['img'], padding=padding, padding_mode='edge') + results['img'] = img results['img_shape'] = img.shape diff --git a/mmocr/datasets/pipelines/transforms.py b/mmocr/datasets/pipelines/transforms.py index 083cb415..59e61af0 100644 --- a/mmocr/datasets/pipelines/transforms.py +++ b/mmocr/datasets/pipelines/transforms.py @@ -1,6 +1,7 @@ import math import cv2 +import mmcv import numpy as np import Polygon as plg import torchvision.transforms as transforms @@ -587,7 +588,7 @@ class RandomRotatePolyInstances: (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8), np.random.randint(0, w * 7 // 8)) img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)] - img_cut = cv2.resize(img_cut, (canvas_size[1], canvas_size[0])) + img_cut = mmcv.imresize(img_cut, (canvas_size[1], canvas_size[0])) mask = cv2.warpAffine( mask, rotation_matrix, (canvas_size[1], canvas_size[0]), @@ -670,7 +671,7 @@ class SquareResizePad: t_w = self.target_size if h <= w else int(w * self.target_size / h) else: t_h = t_w = self.target_size - img = cv2.resize(img, (t_w, t_h)) + img = mmcv.imresize(img, (t_w, t_h)) return img, (t_h, t_w) def square_pad(self, img): @@ -685,7 +686,7 @@ class SquareResizePad: (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8), np.random.randint(0, w * 7 // 8)) img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)] - expand_img = cv2.resize(img_cut, (pad_size, pad_size)) + expand_img = mmcv.imresize(img_cut, (pad_size, pad_size)) if h > w: y0, x0 = 0, (h - w) // 2 else: @@ -758,7 +759,7 @@ class RandomScaling: scales = self.size * 1.0 / max(h, w) * aspect_ratio scales = np.array([scales, scales]) out_size = (int(h * scales[1]), int(w * scales[0])) - image = cv2.resize(image, out_size[::-1]) + image = mmcv.imresize(image, out_size[::-1]) results['img'] = image results['img_shape'] = image.shape diff --git a/mmocr/models/ner/convertors/ner_convertor.py b/mmocr/models/ner/convertors/ner_convertor.py index fa296f77..27db595d 100644 --- a/mmocr/models/ner/convertors/ner_convertor.py +++ b/mmocr/models/ner/convertors/ner_convertor.py @@ -1,6 +1,7 @@ import numpy as np from mmocr.models.builder import CONVERTORS +from mmocr.utils import list_from_file @CONVERTORS.register_module() @@ -36,10 +37,10 @@ class NerConvertor: assert self.max_len > 2 assert self.annotation_type in ['bio', 'bioes'] - lines = open(vocab_file, encoding='utf-8').readlines() - self.vocab_size = len(lines) - for i in range(len(lines)): - self.word2ids.update({lines[i].rstrip(): i}) + vocabs = list_from_file(vocab_file) + self.vocab_size = len(vocabs) + for idx, vocab in enumerate(vocabs): + self.word2ids.update({vocab: idx}) if self.annotation_type == 'bio': self.label2id_dict, self.id2label, self.ignore_id = \