PaddleOCR/ppocr/data/imaug/ct_process.py

# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import cv2
import paddle
import random
import pyclipper
import numpy as np
from PIL import Image

import paddle.vision.transforms as transforms

from ppocr.utils.utility import check_install


class RandomScale:
    def __init__(self, short_size=640, **kwargs):
        self.short_size = short_size

    def scale_aligned(self, img, scale):
        oh, ow = img.shape[0:2]
        h = int(oh * scale + 0.5)
        w = int(ow * scale + 0.5)
        if h % 32 != 0:
            h = h + (32 - h % 32)
        if w % 32 != 0:
            w = w + (32 - w % 32)
        img = cv2.resize(img, dsize=(w, h))
        factor_h = h / oh
        factor_w = w / ow
        return img, factor_h, factor_w

    def __call__(self, data):
        img = data["image"]

        h, w = img.shape[0:2]
        random_scale = np.array([0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3])
        scale = (np.random.choice(random_scale) * self.short_size) / min(h, w)
        img, factor_h, factor_w = self.scale_aligned(img, scale)

        data["scale_factor"] = (factor_w, factor_h)
        data["image"] = img
        return data


class MakeShrink:
    def __init__(self, kernel_scale=0.7, **kwargs):
        self.kernel_scale = kernel_scale

    def dist(self, a, b):
        return np.linalg.norm((a - b), ord=2, axis=0)

    def perimeter(self, bbox):
        peri = 0.0
        for i in range(bbox.shape[0]):
            peri += self.dist(bbox[i], bbox[(i + 1) % bbox.shape[0]])
        return peri

    def shrink(self, bboxes, rate, max_shr=20):
        check_install("Polygon", "Polygon3")
        import Polygon as plg

        rate = rate * rate
        shrinked_bboxes = []
        for bbox in bboxes:
            area = plg.Polygon(bbox).area()
            peri = self.perimeter(bbox)

            try:
                pco = pyclipper.PyclipperOffset()
                pco.AddPath(bbox, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
                offset = min(int(area * (1 - rate) / (peri + 0.001) + 0.5), max_shr)

                shrinked_bbox = pco.Execute(-offset)
                if len(shrinked_bbox) == 0:
                    shrinked_bboxes.append(bbox)
                    continue

                shrinked_bbox = np.array(shrinked_bbox[0])
                if shrinked_bbox.shape[0] <= 2:
                    shrinked_bboxes.append(bbox)
                    continue

                shrinked_bboxes.append(shrinked_bbox)
            except Exception as e:
                shrinked_bboxes.append(bbox)

        return shrinked_bboxes

    def __call__(self, data):
        img = data["image"]
        bboxes = data["polys"]
        words = data["texts"]
        scale_factor = data["scale_factor"]

        gt_instance = np.zeros(img.shape[0:2], dtype="uint8")  # h,w
        training_mask = np.ones(img.shape[0:2], dtype="uint8")
        training_mask_distance = np.ones(img.shape[0:2], dtype="uint8")

        for i in range(len(bboxes)):
            bboxes[i] = np.reshape(
                bboxes[i]
                * ([scale_factor[0], scale_factor[1]] * (bboxes[i].shape[0] // 2)),
                (bboxes[i].shape[0] // 2, 2),
            ).astype("int32")

        for i in range(len(bboxes)):
            # different value for different bbox
            cv2.drawContours(gt_instance, [bboxes[i]], -1, i + 1, -1)

            # set training mask to 0
            cv2.drawContours(training_mask, [bboxes[i]], -1, 0, -1)

            # for not accurate annotation, use training_mask_distance
            if words[i] == "###" or words[i] == "???":
                cv2.drawContours(training_mask_distance, [bboxes[i]], -1, 0, -1)

        # make shrink
        gt_kernel_instance = np.zeros(img.shape[0:2], dtype="uint8")
        kernel_bboxes = self.shrink(bboxes, self.kernel_scale)
        for i in range(len(bboxes)):
            cv2.drawContours(gt_kernel_instance, [kernel_bboxes[i]], -1, i + 1, -1)

            # for training mask, kernel and background= 1, box region=0
            if words[i] != "###" and words[i] != "???":
                cv2.drawContours(training_mask, [kernel_bboxes[i]], -1, 1, -1)

        gt_kernel = gt_kernel_instance.copy()
        # for gt_kernel, kernel = 1
        gt_kernel[gt_kernel > 0] = 1

        # shrink 2 times
        tmp1 = gt_kernel_instance.copy()
        erode_kernel = np.ones((3, 3), np.uint8)
        tmp1 = cv2.erode(tmp1, erode_kernel, iterations=1)
        tmp2 = tmp1.copy()
        tmp2 = cv2.erode(tmp2, erode_kernel, iterations=1)

        # compute text region
        gt_kernel_inner = tmp1 - tmp2

        # gt_instance: text instance, bg=0, diff word use diff value
        # training_mask: text instance mask, word=0，kernel and bg=1
        # gt_kernel_instance: text kernel instance, bg=0, diff word use diff value
        # gt_kernel: text_kernel, bg=0，diff word use same value
        # gt_kernel_inner: text kernel reference
        # training_mask_distance: word without anno = 0, else 1

        data["image"] = [
            img,
            gt_instance,
            training_mask,
            gt_kernel_instance,
            gt_kernel,
            gt_kernel_inner,
            training_mask_distance,
        ]
        return data


class GroupRandomHorizontalFlip:
    def __init__(self, p=0.5, **kwargs):
        self.p = p

    def __call__(self, data):
        imgs = data["image"]

        if random.random() < self.p:
            for i in range(len(imgs)):
                imgs[i] = np.flip(imgs[i], axis=1).copy()
        data["image"] = imgs
        return data


class GroupRandomRotate:
    def __init__(self, **kwargs):
        pass

    def __call__(self, data):
        imgs = data["image"]

        max_angle = 10
        angle = random.random() * 2 * max_angle - max_angle
        for i in range(len(imgs)):
            img = imgs[i]
            w, h = img.shape[:2]
            rotation_matrix = cv2.getRotationMatrix2D((h / 2, w / 2), angle, 1)
            img_rotation = cv2.warpAffine(
                img, rotation_matrix, (h, w), flags=cv2.INTER_NEAREST
            )
            imgs[i] = img_rotation

        data["image"] = imgs
        return data


class GroupRandomCropPadding:
    def __init__(self, target_size=(640, 640), **kwargs):
        self.target_size = target_size

    def __call__(self, data):
        imgs = data["image"]

        h, w = imgs[0].shape[0:2]
        t_w, t_h = self.target_size
        p_w, p_h = self.target_size
        if w == t_w and h == t_h:
            return data

        t_h = t_h if t_h < h else h
        t_w = t_w if t_w < w else w

        if random.random() > 3.0 / 8.0 and np.max(imgs[1]) > 0:
            # make sure to crop the text region
            tl = np.min(np.where(imgs[1] > 0), axis=1) - (t_h, t_w)
            tl[tl < 0] = 0
            br = np.max(np.where(imgs[1] > 0), axis=1) - (t_h, t_w)
            br[br < 0] = 0
            br[0] = min(br[0], h - t_h)
            br[1] = min(br[1], w - t_w)

            i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0
            j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0
        else:
            i = random.randint(0, h - t_h) if h - t_h > 0 else 0
            j = random.randint(0, w - t_w) if w - t_w > 0 else 0

        n_imgs = []
        for idx in range(len(imgs)):
            if len(imgs[idx].shape) == 3:
                s3_length = int(imgs[idx].shape[-1])
                img = imgs[idx][i : i + t_h, j : j + t_w, :]
                img_p = cv2.copyMakeBorder(
                    img,
                    0,
                    p_h - t_h,
                    0,
                    p_w - t_w,
                    borderType=cv2.BORDER_CONSTANT,
                    value=tuple(0 for i in range(s3_length)),
                )
            else:
                img = imgs[idx][i : i + t_h, j : j + t_w]
                img_p = cv2.copyMakeBorder(
                    img,
                    0,
                    p_h - t_h,
                    0,
                    p_w - t_w,
                    borderType=cv2.BORDER_CONSTANT,
                    value=(0,),
                )
            n_imgs.append(img_p)

        data["image"] = n_imgs
        return data


class MakeCentripetalShift:
    def __init__(self, **kwargs):
        pass

    def jaccard(self, As, Bs):
        A = As.shape[0]  # small
        B = Bs.shape[0]  # large

        dis = np.sqrt(
            np.sum(
                (
                    As[:, np.newaxis, :].repeat(B, axis=1)
                    - Bs[np.newaxis, :, :].repeat(A, axis=0)
                )
                ** 2,
                axis=-1,
            )
        )

        ind = np.argmin(dis, axis=-1)

        return ind

    def __call__(self, data):
        imgs = data["image"]

        (
            img,
            gt_instance,
            training_mask,
            gt_kernel_instance,
            gt_kernel,
            gt_kernel_inner,
            training_mask_distance,
        ) = (imgs[0], imgs[1], imgs[2], imgs[3], imgs[4], imgs[5], imgs[6])

        max_instance = np.max(gt_instance)  # num bbox

        # make centripetal shift
        gt_distance = np.zeros((2, *img.shape[0:2]), dtype=np.float32)
        for i in range(1, max_instance + 1):
            # kernel_reference
            ind = gt_kernel_inner == i

            if np.sum(ind) == 0:
                training_mask[gt_instance == i] = 0
                training_mask_distance[gt_instance == i] = 0
                continue

            kpoints = (
                np.array(np.where(ind)).transpose((1, 0))[:, ::-1].astype("float32")
            )

            ind = (gt_instance == i) * (gt_kernel_instance == 0)
            if np.sum(ind) == 0:
                continue
            pixels = np.where(ind)

            points = np.array(pixels).transpose((1, 0))[:, ::-1].astype("float32")

            bbox_ind = self.jaccard(points, kpoints)

            offset_gt = kpoints[bbox_ind] - points

            gt_distance[:, pixels[0], pixels[1]] = offset_gt.T * 0.1

        img = Image.fromarray(img)
        img = img.convert("RGB")

        data["image"] = img
        data["gt_kernel"] = gt_kernel.astype("int64")
        data["training_mask"] = training_mask.astype("int64")
        data["gt_instance"] = gt_instance.astype("int64")
        data["gt_kernel_instance"] = gt_kernel_instance.astype("int64")
        data["training_mask_distance"] = training_mask_distance.astype("int64")
        data["gt_distance"] = gt_distance.astype("float32")

        return data


class ScaleAlignedShort:
    def __init__(self, short_size=640, **kwargs):
        self.short_size = short_size

    def __call__(self, data):
        img = data["image"]

        org_img_shape = img.shape

        h, w = img.shape[0:2]
        scale = self.short_size * 1.0 / min(h, w)
        h = int(h * scale + 0.5)
        w = int(w * scale + 0.5)
        if h % 32 != 0:
            h = h + (32 - h % 32)
        if w % 32 != 0:
            w = w + (32 - w % 32)
        img = cv2.resize(img, dsize=(w, h))

        new_img_shape = img.shape
        img_shape = np.array(org_img_shape + new_img_shape)

        data["shape"] = img_shape
        data["image"] = img

        return data
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 								#
 								# Licensed under the Apache License, Version 2.0 (the "License");
 								# you may not use this file except in compliance with the License.
 								# You may obtain a copy of the License at
 								#
 								#    http://www.apache.org/licenses/LICENSE-2.0
 								#
 								# Unless required by applicable law or agreed to in writing, software
 								# distributed under the License is distributed on an "AS IS" BASIS,
 								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								# See the License for the specific language governing permissions and
 								# limitations under the License.
 								import os
 								import cv2
-												[Cherry-pick] Cherry-pick from release/2.6 (#11092)

* Update recognition_en.md (#10059)

ic15_dict.txt only have 36 digits

* Update ocr_rec.h (#9469)

It is enough to include preprocess_op.h, we do not need to include ocr_cls.h.

* 补充num_classes注释说明 (#10073)

ser_vi_layoutxlm_xfund_zh.yml中的Architecture.Backbone.num_classes所赋值会设置给Loss.num_classes，
由于采用BIO标注，假设字典中包含n个字段（包含other）时，则类别数为2n-1;假设字典中包含n个字段（不含other）时，则类别数为2n+1。

* Update algorithm_overview_en.md (#9747)

Fix links to super-resolution algorithm docs

* 改进文档`deploy/hubserving/readme.md`和`doc/doc_ch/models_list.md` (#9110)

* Update readme.md

* Update readme.md

* Update readme.md

* Update models_list.md

* trim trailling spaces @ `deploy/hubserving/readme_en.md`

* `s/shell/bash/` @ `deploy/hubserving/readme_en.md`

* Update `deploy/hubserving/readme_en.md` to sync with `deploy/hubserving/readme.md`

* Update deploy/hubserving/readme_en.md to sync with `deploy/hubserving/readme.md`

* Update deploy/hubserving/readme_en.md to sync with `deploy/hubserving/readme.md`

* Update `doc/doc_en/models_list_en.md` to sync with `doc/doc_ch/models_list_en.md`

* using Grammarly to weak `deploy/hubserving/readme_en.md`

* using Grammarly to tweak `doc/doc_en/models_list_en.md`

* `ocr_system` module will return with values of field `confidence`

* Update README_CN.md

* 修复测试服务中图片转Base64的引用地址错误。 (#8334)

* Update application.md

* [Doc] Fix 404 link.  (#10318)

* Update PP-OCRv3_det_train.md

* Update knowledge_distillation.md

* Update config.md

* Fix fitz camelCase deprecation and .PDF not being recognized as pdf file (#10181)

* Fix fitz camelCase deprecation and .PDF not being recognized as pdf file

* refactor get_image_file_list function

* Update customize.md (#10325)

* Update FAQ.md (#10345)

* Update FAQ.md (#10349)

* Don't break overall processing on a bad image (#10216)

* Add preprocessing common to OCR tasks (#10217)

Add preprocessing to options

* [MLU] add mlu device for infer (#10249)

* Create newfeature.md

* Update newfeature.md

* remove unused imported module, so can avoid PyInstaller packaged binary's start-time not found module error. (#10502)

* CV套件建设专项活动 - 文字识别返回单字识别坐标 (#10515)

* modification of return word box

* update_implements

* Update rec_postprocess.py

* Update utility.py

* Update README_ch.md

* revert README_ch.md update

* Fixed Layout recovery README file (#10493)

Co-authored-by: Shubham Chambhare <shubhamchambhare@zoop.one>

* update_doc

* bugfix

---------

Co-authored-by: ChuongLoc <89434232+ChuongLoc@users.noreply.github.com>
Co-authored-by: Wang Xin <xinwang614@gmail.com>
Co-authored-by: tanjh <dtdhinjapan@gmail.com>
Co-authored-by: Louis Maddox <lmmx@users.noreply.github.com>
Co-authored-by: n0099 <n@n0099.net>
Co-authored-by: zhenliang li <37922155+shouyong@users.noreply.github.com>
Co-authored-by: itasli <ilyas.tasli@outlook.fr>
Co-authored-by: UserUnknownFactor <63057995+UserUnknownFactor@users.noreply.github.com>
Co-authored-by: PeiyuLau <135964669+PeiyuLau@users.noreply.github.com>
Co-authored-by: kerneltravel <kjpioo2006@gmail.com>
Co-authored-by: ToddBear <43341135+ToddBear@users.noreply.github.com>
Co-authored-by: Ligoml <39876205+Ligoml@users.noreply.github.com>
Co-authored-by: Shubham Chambhare <59397280+Shubham654@users.noreply.github.com>
Co-authored-by: Shubham Chambhare <shubhamchambhare@zoop.one>
Co-authored-by: andyj <87074272+andyjpaddle@users.noreply.github.com>
											
										
										
											2023-10-18 17:37:23 +08:00
+								import paddle
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								import random
 								import pyclipper
 								import numpy as np
 								from PIL import Image
-												[Cherry-pick] Cherry-pick from release/2.6 (#11092)

* Update recognition_en.md (#10059)

ic15_dict.txt only have 36 digits

* Update ocr_rec.h (#9469)

It is enough to include preprocess_op.h, we do not need to include ocr_cls.h.

* 补充num_classes注释说明 (#10073)

ser_vi_layoutxlm_xfund_zh.yml中的Architecture.Backbone.num_classes所赋值会设置给Loss.num_classes，
由于采用BIO标注，假设字典中包含n个字段（包含other）时，则类别数为2n-1;假设字典中包含n个字段（不含other）时，则类别数为2n+1。

* Update algorithm_overview_en.md (#9747)

Fix links to super-resolution algorithm docs

* 改进文档`deploy/hubserving/readme.md`和`doc/doc_ch/models_list.md` (#9110)

* Update readme.md

* Update readme.md

* Update readme.md

* Update models_list.md

* trim trailling spaces @ `deploy/hubserving/readme_en.md`

* `s/shell/bash/` @ `deploy/hubserving/readme_en.md`

* Update `deploy/hubserving/readme_en.md` to sync with `deploy/hubserving/readme.md`

* Update deploy/hubserving/readme_en.md to sync with `deploy/hubserving/readme.md`

* Update deploy/hubserving/readme_en.md to sync with `deploy/hubserving/readme.md`

* Update `doc/doc_en/models_list_en.md` to sync with `doc/doc_ch/models_list_en.md`

* using Grammarly to weak `deploy/hubserving/readme_en.md`

* using Grammarly to tweak `doc/doc_en/models_list_en.md`

* `ocr_system` module will return with values of field `confidence`

* Update README_CN.md

* 修复测试服务中图片转Base64的引用地址错误。 (#8334)

* Update application.md

* [Doc] Fix 404 link.  (#10318)

* Update PP-OCRv3_det_train.md

* Update knowledge_distillation.md

* Update config.md

* Fix fitz camelCase deprecation and .PDF not being recognized as pdf file (#10181)

* Fix fitz camelCase deprecation and .PDF not being recognized as pdf file

* refactor get_image_file_list function

* Update customize.md (#10325)

* Update FAQ.md (#10345)

* Update FAQ.md (#10349)

* Don't break overall processing on a bad image (#10216)

* Add preprocessing common to OCR tasks (#10217)

Add preprocessing to options

* [MLU] add mlu device for infer (#10249)

* Create newfeature.md

* Update newfeature.md

* remove unused imported module, so can avoid PyInstaller packaged binary's start-time not found module error. (#10502)

* CV套件建设专项活动 - 文字识别返回单字识别坐标 (#10515)

* modification of return word box

* update_implements

* Update rec_postprocess.py

* Update utility.py

* Update README_ch.md

* revert README_ch.md update

* Fixed Layout recovery README file (#10493)

Co-authored-by: Shubham Chambhare <shubhamchambhare@zoop.one>

* update_doc

* bugfix

---------

Co-authored-by: ChuongLoc <89434232+ChuongLoc@users.noreply.github.com>
Co-authored-by: Wang Xin <xinwang614@gmail.com>
Co-authored-by: tanjh <dtdhinjapan@gmail.com>
Co-authored-by: Louis Maddox <lmmx@users.noreply.github.com>
Co-authored-by: n0099 <n@n0099.net>
Co-authored-by: zhenliang li <37922155+shouyong@users.noreply.github.com>
Co-authored-by: itasli <ilyas.tasli@outlook.fr>
Co-authored-by: UserUnknownFactor <63057995+UserUnknownFactor@users.noreply.github.com>
Co-authored-by: PeiyuLau <135964669+PeiyuLau@users.noreply.github.com>
Co-authored-by: kerneltravel <kjpioo2006@gmail.com>
Co-authored-by: ToddBear <43341135+ToddBear@users.noreply.github.com>
Co-authored-by: Ligoml <39876205+Ligoml@users.noreply.github.com>
Co-authored-by: Shubham Chambhare <59397280+Shubham654@users.noreply.github.com>
Co-authored-by: Shubham Chambhare <shubhamchambhare@zoop.one>
Co-authored-by: andyj <87074272+andyjpaddle@users.noreply.github.com>
											
										
										
											2023-10-18 17:37:23 +08:00
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								import paddle.vision.transforms as transforms
-												[Cherry-pick] Cherry-pick from release/2.6 (#11092)

* Update recognition_en.md (#10059)

ic15_dict.txt only have 36 digits

* Update ocr_rec.h (#9469)

It is enough to include preprocess_op.h, we do not need to include ocr_cls.h.

* 补充num_classes注释说明 (#10073)

ser_vi_layoutxlm_xfund_zh.yml中的Architecture.Backbone.num_classes所赋值会设置给Loss.num_classes，
由于采用BIO标注，假设字典中包含n个字段（包含other）时，则类别数为2n-1;假设字典中包含n个字段（不含other）时，则类别数为2n+1。

* Update algorithm_overview_en.md (#9747)

Fix links to super-resolution algorithm docs

* 改进文档`deploy/hubserving/readme.md`和`doc/doc_ch/models_list.md` (#9110)

* Update readme.md

* Update readme.md

* Update readme.md

* Update models_list.md

* trim trailling spaces @ `deploy/hubserving/readme_en.md`

* `s/shell/bash/` @ `deploy/hubserving/readme_en.md`

* Update `deploy/hubserving/readme_en.md` to sync with `deploy/hubserving/readme.md`

* Update deploy/hubserving/readme_en.md to sync with `deploy/hubserving/readme.md`

* Update deploy/hubserving/readme_en.md to sync with `deploy/hubserving/readme.md`

* Update `doc/doc_en/models_list_en.md` to sync with `doc/doc_ch/models_list_en.md`

* using Grammarly to weak `deploy/hubserving/readme_en.md`

* using Grammarly to tweak `doc/doc_en/models_list_en.md`

* `ocr_system` module will return with values of field `confidence`

* Update README_CN.md

* 修复测试服务中图片转Base64的引用地址错误。 (#8334)

* Update application.md

* [Doc] Fix 404 link.  (#10318)

* Update PP-OCRv3_det_train.md

* Update knowledge_distillation.md

* Update config.md

* Fix fitz camelCase deprecation and .PDF not being recognized as pdf file (#10181)

* Fix fitz camelCase deprecation and .PDF not being recognized as pdf file

* refactor get_image_file_list function

* Update customize.md (#10325)

* Update FAQ.md (#10345)

* Update FAQ.md (#10349)

* Don't break overall processing on a bad image (#10216)

* Add preprocessing common to OCR tasks (#10217)

Add preprocessing to options

* [MLU] add mlu device for infer (#10249)

* Create newfeature.md

* Update newfeature.md

* remove unused imported module, so can avoid PyInstaller packaged binary's start-time not found module error. (#10502)

* CV套件建设专项活动 - 文字识别返回单字识别坐标 (#10515)

* modification of return word box

* update_implements

* Update rec_postprocess.py

* Update utility.py

* Update README_ch.md

* revert README_ch.md update

* Fixed Layout recovery README file (#10493)

Co-authored-by: Shubham Chambhare <shubhamchambhare@zoop.one>

* update_doc

* bugfix

---------

Co-authored-by: ChuongLoc <89434232+ChuongLoc@users.noreply.github.com>
Co-authored-by: Wang Xin <xinwang614@gmail.com>
Co-authored-by: tanjh <dtdhinjapan@gmail.com>
Co-authored-by: Louis Maddox <lmmx@users.noreply.github.com>
Co-authored-by: n0099 <n@n0099.net>
Co-authored-by: zhenliang li <37922155+shouyong@users.noreply.github.com>
Co-authored-by: itasli <ilyas.tasli@outlook.fr>
Co-authored-by: UserUnknownFactor <63057995+UserUnknownFactor@users.noreply.github.com>
Co-authored-by: PeiyuLau <135964669+PeiyuLau@users.noreply.github.com>
Co-authored-by: kerneltravel <kjpioo2006@gmail.com>
Co-authored-by: ToddBear <43341135+ToddBear@users.noreply.github.com>
Co-authored-by: Ligoml <39876205+Ligoml@users.noreply.github.com>
Co-authored-by: Shubham Chambhare <59397280+Shubham654@users.noreply.github.com>
Co-authored-by: Shubham Chambhare <shubhamchambhare@zoop.one>
Co-authored-by: andyj <87074272+andyjpaddle@users.noreply.github.com>
											
										
										
											2023-10-18 17:37:23 +08:00
+								from ppocr.utils.utility import check_install
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								class RandomScale:
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								    def __init__(self, short_size=640, **kwargs):
 								        self.short_size = short_size
 								    def scale_aligned(self, img, scale):
 								        oh, ow = img.shape[0:2]
 								        h = int(oh * scale + 0.5)
 								        w = int(ow * scale + 0.5)
 								        if h % 32 != 0:
 								            h = h + (32 - h % 32)
 								        if w % 32 != 0:
 								            w = w + (32 - w % 32)
 								        img = cv2.resize(img, dsize=(w, h))
 								        factor_h = h / oh
 								        factor_w = w / ow
 								        return img, factor_h, factor_w
 								    def __call__(self, data):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        img = data["image"]
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								        h, w = img.shape[0:2]
 								        random_scale = np.array([0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3])
 								        scale = (np.random.choice(random_scale) * self.short_size) / min(h, w)
 								        img, factor_h, factor_w = self.scale_aligned(img, scale)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        data["scale_factor"] = (factor_w, factor_h)
 								        data["image"] = img
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								        return data
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								class MakeShrink:
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								    def __init__(self, kernel_scale=0.7, **kwargs):
 								        self.kernel_scale = kernel_scale
 								    def dist(self, a, b):
 								        return np.linalg.norm((a - b), ord=2, axis=0)
 								    def perimeter(self, bbox):
 								        peri = 0.0
 								        for i in range(bbox.shape[0]):
 								            peri += self.dist(bbox[i], bbox[(i + 1) % bbox.shape[0]])
 								        return peri
 								    def shrink(self, bboxes, rate, max_shr=20):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        check_install("Polygon", "Polygon3")
-												[check_install] (#8177)

* support min_area_rect crop

* add check_install

* fix requirement.txt

* fix check_install

* add lanms-neo for drrg

* fix

* fix doc
											
										
										
											2022-11-15 14:25:39 +08:00
+								        import Polygon as plg
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								        rate = rate * rate
 								        shrinked_bboxes = []
 								        for bbox in bboxes:
 								            area = plg.Polygon(bbox).area()
 								            peri = self.perimeter(bbox)
 								            try:
 								                pco = pyclipper.PyclipperOffset()
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                pco.AddPath(bbox, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
 								                offset = min(int(area * (1 - rate) / (peri + 0.001) + 0.5), max_shr)
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								                shrinked_bbox = pco.Execute(-offset)
 								                if len(shrinked_bbox) == 0:
 								                    shrinked_bboxes.append(bbox)
 								                    continue
 								                shrinked_bbox = np.array(shrinked_bbox[0])
 								                if shrinked_bbox.shape[0] <= 2:
 								                    shrinked_bboxes.append(bbox)
 								                    continue
 								                shrinked_bboxes.append(shrinked_bbox)
 								            except Exception as e:
 								                shrinked_bboxes.append(bbox)
 								        return shrinked_bboxes
 								    def __call__(self, data):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        img = data["image"]
 								        bboxes = data["polys"]
 								        words = data["texts"]
 								        scale_factor = data["scale_factor"]
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        gt_instance = np.zeros(img.shape[0:2], dtype="uint8")  # h,w
 								        training_mask = np.ones(img.shape[0:2], dtype="uint8")
 								        training_mask_distance = np.ones(img.shape[0:2], dtype="uint8")
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								        for i in range(len(bboxes)):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            bboxes[i] = np.reshape(
 								                bboxes[i]
 								                * ([scale_factor[0], scale_factor[1]] * (bboxes[i].shape[0] // 2)),
 								                (bboxes[i].shape[0] // 2, 2),
 								            ).astype("int32")
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								        for i in range(len(bboxes)):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            # different value for different bbox
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								            cv2.drawContours(gt_instance, [bboxes[i]], -1, i + 1, -1)
 								            # set training mask to 0
 								            cv2.drawContours(training_mask, [bboxes[i]], -1, 0, -1)
 								            # for not accurate annotation, use training_mask_distance
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            if words[i] == "###" or words[i] == "???":
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								                cv2.drawContours(training_mask_distance, [bboxes[i]], -1, 0, -1)
 								        # make shrink
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        gt_kernel_instance = np.zeros(img.shape[0:2], dtype="uint8")
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								        kernel_bboxes = self.shrink(bboxes, self.kernel_scale)
 								        for i in range(len(bboxes)):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            cv2.drawContours(gt_kernel_instance, [kernel_bboxes[i]], -1, i + 1, -1)
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								            # for training mask, kernel and background= 1, box region=0
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            if words[i] != "###" and words[i] != "???":
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								                cv2.drawContours(training_mask, [kernel_bboxes[i]], -1, 1, -1)
 								        gt_kernel = gt_kernel_instance.copy()
 								        # for gt_kernel, kernel = 1
 								        gt_kernel[gt_kernel > 0] = 1
 								        # shrink 2 times
 								        tmp1 = gt_kernel_instance.copy()
 								        erode_kernel = np.ones((3, 3), np.uint8)
 								        tmp1 = cv2.erode(tmp1, erode_kernel, iterations=1)
 								        tmp2 = tmp1.copy()
 								        tmp2 = cv2.erode(tmp2, erode_kernel, iterations=1)
 								        # compute text region
 								        gt_kernel_inner = tmp1 - tmp2
 								        # gt_instance: text instance, bg=0, diff word use diff value
 								        # training_mask: text instance mask, word=0，kernel and bg=1
 								        # gt_kernel_instance: text kernel instance, bg=0, diff word use diff value
 								        # gt_kernel: text_kernel, bg=0，diff word use same value
 								        # gt_kernel_inner: text kernel reference
 								        # training_mask_distance: word without anno = 0, else 1
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        data["image"] = [
 								            img,
 								            gt_instance,
 								            training_mask,
 								            gt_kernel_instance,
 								            gt_kernel,
 								            gt_kernel_inner,
 								            training_mask_distance,
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								        ]
 								        return data
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								class GroupRandomHorizontalFlip:
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								    def __init__(self, p=0.5, **kwargs):
 								        self.p = p
 								    def __call__(self, data):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        imgs = data["image"]
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								        if random.random() < self.p:
 								            for i in range(len(imgs)):
 								                imgs[i] = np.flip(imgs[i], axis=1).copy()
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        data["image"] = imgs
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								        return data
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								class GroupRandomRotate:
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								    def __init__(self, **kwargs):
 								        pass
 								    def __call__(self, data):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        imgs = data["image"]
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								        max_angle = 10
 								        angle = random.random() * 2 * max_angle - max_angle
 								        for i in range(len(imgs)):
 								            img = imgs[i]
 								            w, h = img.shape[:2]
 								            rotation_matrix = cv2.getRotationMatrix2D((h / 2, w / 2), angle, 1)
 								            img_rotation = cv2.warpAffine(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                img, rotation_matrix, (h, w), flags=cv2.INTER_NEAREST
 								            )
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								            imgs[i] = img_rotation
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        data["image"] = imgs
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								        return data
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								class GroupRandomCropPadding:
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								    def __init__(self, target_size=(640, 640), **kwargs):
 								        self.target_size = target_size
 								    def __call__(self, data):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        imgs = data["image"]
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								        h, w = imgs[0].shape[0:2]
 								        t_w, t_h = self.target_size
 								        p_w, p_h = self.target_size
 								        if w == t_w and h == t_h:
 								            return data
 								        t_h = t_h if t_h < h else h
 								        t_w = t_w if t_w < w else w
 								        if random.random() > 3.0 / 8.0 and np.max(imgs[1]) > 0:
 								            # make sure to crop the text region
 								            tl = np.min(np.where(imgs[1] > 0), axis=1) - (t_h, t_w)
 								            tl[tl < 0] = 0
 								            br = np.max(np.where(imgs[1] > 0), axis=1) - (t_h, t_w)
 								            br[br < 0] = 0
 								            br[0] = min(br[0], h - t_h)
 								            br[1] = min(br[1], w - t_w)
 								            i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0
 								            j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0
 								        else:
 								            i = random.randint(0, h - t_h) if h - t_h > 0 else 0
 								            j = random.randint(0, w - t_w) if w - t_w > 0 else 0
 								        n_imgs = []
 								        for idx in range(len(imgs)):
 								            if len(imgs[idx].shape) == 3:
 								                s3_length = int(imgs[idx].shape[-1])
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                img = imgs[idx][i : i + t_h, j : j + t_w, :]
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								                img_p = cv2.copyMakeBorder(
 								                    img,
 ,
 								                    p_h - t_h,
 ,
 								                    p_w - t_w,
 								                    borderType=cv2.BORDER_CONSTANT,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    value=tuple(0 for i in range(s3_length)),
 								                )
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								            else:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                img = imgs[idx][i : i + t_h, j : j + t_w]
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								                img_p = cv2.copyMakeBorder(
 								                    img,
 ,
 								                    p_h - t_h,
 ,
 								                    p_w - t_w,
 								                    borderType=cv2.BORDER_CONSTANT,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    value=(0,),
 								                )
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								            n_imgs.append(img_p)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        data["image"] = n_imgs
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								        return data
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								class MakeCentripetalShift:
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								    def __init__(self, **kwargs):
 								        pass
 								    def jaccard(self, As, Bs):
 								        A = As.shape[0]  # small
 								        B = Bs.shape[0]  # large
 								        dis = np.sqrt(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            np.sum(
 								                (
 								                    As[:, np.newaxis, :].repeat(B, axis=1)
 								                    - Bs[np.newaxis, :, :].repeat(A, axis=0)
 								                )
 								                ** 2,
 								                axis=-1,
 								            )
 								        )
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								        ind = np.argmin(dis, axis=-1)
 								        return ind
 								    def __call__(self, data):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        imgs = data["image"]
 								        (
 								            img,
 								            gt_instance,
 								            training_mask,
 								            gt_kernel_instance,
 								            gt_kernel,
 								            gt_kernel_inner,
 								            training_mask_distance,
 								        ) = (imgs[0], imgs[1], imgs[2], imgs[3], imgs[4], imgs[5], imgs[6])
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								        max_instance = np.max(gt_instance)  # num bbox
 								        # make centripetal shift
 								        gt_distance = np.zeros((2, *img.shape[0:2]), dtype=np.float32)
 								        for i in range(1, max_instance + 1):
 								            # kernel_reference
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            ind = gt_kernel_inner == i
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								            if np.sum(ind) == 0:
 								                training_mask[gt_instance == i] = 0
 								                training_mask_distance[gt_instance == i] = 0
 								                continue
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            kpoints = (
 								                np.array(np.where(ind)).transpose((1, 0))[:, ::-1].astype("float32")
 								            )
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								            ind = (gt_instance == i) * (gt_kernel_instance == 0)
 								            if np.sum(ind) == 0:
 								                continue
 								            pixels = np.where(ind)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            points = np.array(pixels).transpose((1, 0))[:, ::-1].astype("float32")
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								            bbox_ind = self.jaccard(points, kpoints)
 								            offset_gt = kpoints[bbox_ind] - points
 								            gt_distance[:, pixels[0], pixels[1]] = offset_gt.T * 0.1
 								        img = Image.fromarray(img)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        img = img.convert("RGB")
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								        data["image"] = img
 								        data["gt_kernel"] = gt_kernel.astype("int64")
 								        data["training_mask"] = training_mask.astype("int64")
 								        data["gt_instance"] = gt_instance.astype("int64")
 								        data["gt_kernel_instance"] = gt_kernel_instance.astype("int64")
 								        data["training_mask_distance"] = training_mask_distance.astype("int64")
 								        data["gt_distance"] = gt_distance.astype("float32")
 								        return data
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								class ScaleAlignedShort:
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
+								    def __init__(self, short_size=640, **kwargs):
 								        self.short_size = short_size
 								    def __call__(self, data):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        img = data["image"]
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
 								        org_img_shape = img.shape
 								        h, w = img.shape[0:2]
 								        scale = self.short_size * 1.0 / min(h, w)
 								        h = int(h * scale + 0.5)
 								        w = int(w * scale + 0.5)
 								        if h % 32 != 0:
 								            h = h + (32 - h % 32)
 								        if w % 32 != 0:
 								            w = w + (32 - w % 32)
 								        img = cv2.resize(img, dsize=(w, h))
 								        new_img_shape = img.shape
 								        img_shape = np.array(org_img_shape + new_img_shape)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        data["shape"] = img_shape
 								        data["image"] = img
-												add centripetal text model

											
										
										
											2022-09-15 19:08:16 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        return data