PaddleOCR/ppocr/utils/utility.py

206 lines
6.7 KiB
Python
Raw Permalink Normal View History

2020-05-10 16:26:57 +08:00
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
2020-05-12 21:12:52 +08:00
import os
2020-07-28 11:18:48 +08:00
import cv2
2022-01-07 12:56:45 +08:00
import random
import numpy as np
import paddle
import importlib.util
import sys
import subprocess
2020-05-10 16:26:57 +08:00
2020-10-13 17:13:33 +08:00
def print_dict(d, logger, delimiter=0):
"""
Recursively visualize a dict and
indenting acrrording by the relationship of keys.
"""
for k, v in sorted(d.items()):
if isinstance(v, dict):
logger.info("{}{} : ".format(delimiter * " ", str(k)))
print_dict(v, logger, delimiter + 4)
elif isinstance(v, list) and len(v) >= 1 and isinstance(v[0], dict):
logger.info("{}{} : ".format(delimiter * " ", str(k)))
for value in v:
print_dict(value, logger, delimiter + 4)
else:
logger.info("{}{} : {}".format(delimiter * " ", k, v))
2020-05-10 16:26:57 +08:00
def get_check_global_params(mode):
check_params = [
"use_gpu",
"max_text_length",
"image_shape",
"image_shape",
"character_type",
"loss_type",
]
2020-05-10 16:26:57 +08:00
if mode == "train_eval":
check_params = check_params + [
"train_batch_size_per_card",
"test_batch_size_per_card",
]
2020-05-10 16:26:57 +08:00
elif mode == "test":
check_params = check_params + ["test_batch_size_per_card"]
2020-05-10 16:26:57 +08:00
return check_params
2022-04-29 13:39:07 +08:00
def _check_image_file(path):
img_end = {"jpg", "bmp", "png", "jpeg", "rgb", "tif", "tiff", "gif", "pdf"}
2022-04-29 13:39:07 +08:00
return any([path.lower().endswith(e) for e in img_end])
def get_image_file_list(img_file, infer_list=None):
2020-05-12 21:12:52 +08:00
imgs_lists = []
[Cherry-pick] Cherry-pick from release/2.6 (#11092) * Update recognition_en.md (#10059) ic15_dict.txt only have 36 digits * Update ocr_rec.h (#9469) It is enough to include preprocess_op.h, we do not need to include ocr_cls.h. * 补充num_classes注释说明 (#10073) ser_vi_layoutxlm_xfund_zh.yml中的Architecture.Backbone.num_classes所赋值会设置给Loss.num_classes, 由于采用BIO标注,假设字典中包含n个字段(包含other)时,则类别数为2n-1;假设字典中包含n个字段(不含other)时,则类别数为2n+1。 * Update algorithm_overview_en.md (#9747) Fix links to super-resolution algorithm docs * 改进文档`deploy/hubserving/readme.md`和`doc/doc_ch/models_list.md` (#9110) * Update readme.md * Update readme.md * Update readme.md * Update models_list.md * trim trailling spaces @ `deploy/hubserving/readme_en.md` * `s/shell/bash/` @ `deploy/hubserving/readme_en.md` * Update `deploy/hubserving/readme_en.md` to sync with `deploy/hubserving/readme.md` * Update deploy/hubserving/readme_en.md to sync with `deploy/hubserving/readme.md` * Update deploy/hubserving/readme_en.md to sync with `deploy/hubserving/readme.md` * Update `doc/doc_en/models_list_en.md` to sync with `doc/doc_ch/models_list_en.md` * using Grammarly to weak `deploy/hubserving/readme_en.md` * using Grammarly to tweak `doc/doc_en/models_list_en.md` * `ocr_system` module will return with values of field `confidence` * Update README_CN.md * 修复测试服务中图片转Base64的引用地址错误。 (#8334) * Update application.md * [Doc] Fix 404 link. (#10318) * Update PP-OCRv3_det_train.md * Update knowledge_distillation.md * Update config.md * Fix fitz camelCase deprecation and .PDF not being recognized as pdf file (#10181) * Fix fitz camelCase deprecation and .PDF not being recognized as pdf file * refactor get_image_file_list function * Update customize.md (#10325) * Update FAQ.md (#10345) * Update FAQ.md (#10349) * Don't break overall processing on a bad image (#10216) * Add preprocessing common to OCR tasks (#10217) Add preprocessing to options * [MLU] add mlu device for infer (#10249) * Create newfeature.md * Update newfeature.md * remove unused imported module, so can avoid PyInstaller packaged binary's start-time not found module error. (#10502) * CV套件建设专项活动 - 文字识别返回单字识别坐标 (#10515) * modification of return word box * update_implements * Update rec_postprocess.py * Update utility.py * Update README_ch.md * revert README_ch.md update * Fixed Layout recovery README file (#10493) Co-authored-by: Shubham Chambhare <shubhamchambhare@zoop.one> * update_doc * bugfix --------- Co-authored-by: ChuongLoc <89434232+ChuongLoc@users.noreply.github.com> Co-authored-by: Wang Xin <xinwang614@gmail.com> Co-authored-by: tanjh <dtdhinjapan@gmail.com> Co-authored-by: Louis Maddox <lmmx@users.noreply.github.com> Co-authored-by: n0099 <n@n0099.net> Co-authored-by: zhenliang li <37922155+shouyong@users.noreply.github.com> Co-authored-by: itasli <ilyas.tasli@outlook.fr> Co-authored-by: UserUnknownFactor <63057995+UserUnknownFactor@users.noreply.github.com> Co-authored-by: PeiyuLau <135964669+PeiyuLau@users.noreply.github.com> Co-authored-by: kerneltravel <kjpioo2006@gmail.com> Co-authored-by: ToddBear <43341135+ToddBear@users.noreply.github.com> Co-authored-by: Ligoml <39876205+Ligoml@users.noreply.github.com> Co-authored-by: Shubham Chambhare <59397280+Shubham654@users.noreply.github.com> Co-authored-by: Shubham Chambhare <shubhamchambhare@zoop.one> Co-authored-by: andyj <87074272+andyjpaddle@users.noreply.github.com>
2023-10-18 17:37:23 +08:00
if img_file is None or not os.path.exists(img_file):
raise Exception("not found any img file in {}".format(img_file))
if os.path.isfile(img_file) and _check_image_file(img_file):
imgs_lists.append(img_file)
elif os.path.isdir(img_file):
for single_file in os.listdir(img_file):
file_path = os.path.join(img_file, single_file)
if os.path.isfile(file_path) and _check_image_file(file_path):
imgs_lists.append(file_path)
2020-05-12 21:12:52 +08:00
if len(imgs_lists) == 0:
raise Exception("not found any img file in {}".format(img_file))
2021-04-09 18:19:34 +08:00
imgs_lists = sorted(imgs_lists)
2020-05-12 21:12:52 +08:00
return imgs_lists
def binarize_img(img):
if len(img.shape) == 3 and img.shape[2] == 3:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # conversion to grayscale image
# use cv2 threshold binarization
_, gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
return img
def alpha_to_color(img, alpha_color=(255, 255, 255)):
if len(img.shape) == 3 and img.shape[2] == 4:
B, G, R, A = cv2.split(img)
alpha = A / 255
R = (alpha_color[0] * (1 - alpha) + R * alpha).astype(np.uint8)
G = (alpha_color[1] * (1 - alpha) + G * alpha).astype(np.uint8)
B = (alpha_color[2] * (1 - alpha) + B * alpha).astype(np.uint8)
img = cv2.merge((B, G, R))
return img
2020-05-12 21:12:52 +08:00
def check_and_read(img_path):
if os.path.basename(img_path)[-3:].lower() == "gif":
2020-07-28 11:18:48 +08:00
gif = cv2.VideoCapture(img_path)
ret, frame = gif.read()
if not ret:
logger = logging.getLogger("ppocr")
2020-10-13 17:13:33 +08:00
logger.info("Cannot read {}. This gif image maybe corrupted.")
2020-07-28 11:29:55 +08:00
return None, False
2020-07-28 11:18:48 +08:00
if len(frame.shape) == 2 or frame.shape[-1] == 1:
frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
imgvalue = frame[:, :, ::-1]
return imgvalue, True, False
elif os.path.basename(img_path)[-3:].lower() == "pdf":
from paddle.utils import try_import
fitz = try_import("fitz")
from PIL import Image
imgs = []
with fitz.open(img_path) as pdf:
for pg in range(0, pdf.page_count):
page = pdf[pg]
mat = fitz.Matrix(2, 2)
pm = page.get_pixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
return imgs, False, True
return None, False, False
def load_vqa_bio_label_maps(label_map_path):
with open(label_map_path, "r", encoding="utf-8") as fin:
lines = fin.readlines()
old_lines = [line.strip() for line in lines]
lines = ["O"]
for line in old_lines:
# "O" has already been in lines
if line.upper() in ["OTHER", "OTHERS", "IGNORE"]:
continue
lines.append(line)
labels = ["O"]
for line in lines[1:]:
labels.append("B-" + line)
labels.append("I-" + line)
label2id_map = {label.upper(): idx for idx, label in enumerate(labels)}
id2label_map = {idx: label.upper() for idx, label in enumerate(labels)}
return label2id_map, id2label_map
2022-01-07 12:56:45 +08:00
def set_seed(seed=1024):
random.seed(seed)
np.random.seed(seed)
paddle.seed(seed)
def check_install(module_name, install_name):
spec = importlib.util.find_spec(module_name)
if spec is None:
print(f"Warnning! The {module_name} module is NOT installed")
print(
f"Try install {module_name} module automatically. You can also try to install manually by pip install {install_name}."
)
python = sys.executable
try:
subprocess.check_call(
[python, "-m", "pip", "install", install_name],
stdout=subprocess.DEVNULL,
)
print(f"The {module_name} module is now installed")
except subprocess.CalledProcessError as exc:
raise Exception(f"Install {module_name} failed, please install manually")
else:
print(f"{module_name} has been installed.")
class AverageMeter:
def __init__(self):
self.reset()
def reset(self):
"""reset"""
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
"""update"""
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count