# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Modified from https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/ppocr/postprocess/rec_postprocess.py import os.path as osp import re import string import numpy as np import requests import torch class BaseRecLabelDecode(object): """ Convert between text-label and text-index """ def __init__(self, character_dict_path=None, use_space_char=False): self.beg_str = 'sos' self.end_str = 'eos' self.character_str = [] if character_dict_path is None: self.character_str = '0123456789abcdefghijklmnopqrstuvwxyz' dict_character = list(self.character_str) else: if character_dict_path.startswith('http'): r = requests.get(character_dict_path) tpath = character_dict_path.split('/')[-1] while not osp.exists(tpath): try: with open(tpath, 'wb') as code: code.write(r.content) except: pass character_dict_path = tpath with open(character_dict_path, 'rb') as fin: lines = fin.readlines() for line in lines: line = line.decode('utf-8').strip('\n').strip('\r\n') self.character_str.append(line) if use_space_char: self.character_str.append(' ') dict_character = list(self.character_str) dict_character = self.add_special_char(dict_character) self.dict = {} for i, char in enumerate(dict_character): self.dict[char] = i self.character = dict_character def add_special_char(self, dict_character): return dict_character def decode(self, text_index, text_prob=None, is_remove_duplicate=False): """ convert text-index into text-label. """ result_list = [] ignored_tokens = self.get_ignored_tokens() batch_size = len(text_index) for batch_idx in range(batch_size): selection = np.ones(len(text_index[batch_idx]), dtype=bool) if is_remove_duplicate: selection[1:] = text_index[batch_idx][1:] != text_index[ batch_idx][:-1] for ignored_token in ignored_tokens: selection &= text_index[batch_idx] != ignored_token char_list = [ self.character[text_id] for text_id in text_index[batch_idx][selection] ] if text_prob is not None: conf_list = text_prob[batch_idx][selection] else: conf_list = [1] * len(selection) if len(conf_list) == 0: conf_list = [0] text = ''.join(char_list) result_list.append((text, np.mean(conf_list).tolist())) return result_list def get_ignored_tokens(self): return [0] # for ctc blank class CTCLabelDecode(BaseRecLabelDecode): """ Convert between text-label and text-index """ def __init__(self, character_dict_path=None, use_space_char=False, **kwargs): super(CTCLabelDecode, self).__init__(character_dict_path, use_space_char) def __call__(self, preds, label=None, *args, **kwargs): if isinstance(preds, tuple) or isinstance(preds, list): preds = preds[-1] if isinstance(preds, torch.Tensor): preds = preds.numpy() preds_idx = preds.argmax(axis=2) preds_prob = preds.max(axis=2) text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) if label is None: return text label = self.decode(label) return text, label def add_special_char(self, dict_character): dict_character = ['blank'] + dict_character return dict_character class SARLabelDecode(BaseRecLabelDecode): """ Convert between text-label and text-index """ def __init__(self, character_dict_path=None, use_space_char=False, **kwargs): super(SARLabelDecode, self).__init__(character_dict_path, use_space_char) self.rm_symbol = kwargs.get('rm_symbol', False) def add_special_char(self, dict_character): beg_end_str = '' unknown_str = '' padding_str = '' dict_character = dict_character + [unknown_str] self.unknown_idx = len(dict_character) - 1 dict_character = dict_character + [beg_end_str] self.start_idx = len(dict_character) - 1 self.end_idx = len(dict_character) - 1 dict_character = dict_character + [padding_str] self.padding_idx = len(dict_character) - 1 return dict_character def decode(self, text_index, text_prob=None, is_remove_duplicate=False): """ convert text-index into text-label. """ result_list = [] ignored_tokens = self.get_ignored_tokens() batch_size = len(text_index) for batch_idx in range(batch_size): char_list = [] conf_list = [] for idx in range(len(text_index[batch_idx])): if text_index[batch_idx][idx] in ignored_tokens: continue if int(text_index[batch_idx][idx]) == int(self.end_idx): if text_prob is None and idx == 0: continue else: break if is_remove_duplicate: # only for predict if idx > 0 and text_index[batch_idx][ idx - 1] == text_index[batch_idx][idx]: continue char_list.append(self.character[int( text_index[batch_idx][idx])]) if text_prob is not None: conf_list.append(text_prob[batch_idx][idx]) else: conf_list.append(1) text = ''.join(char_list) if self.rm_symbol: comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]') text = text.lower() text = comp.sub('', text) result_list.append((text, np.mean(conf_list).tolist())) return result_list def __call__(self, preds, label=None, *args, **kwargs): if isinstance(preds, torch.Tensor): preds = preds.cpu().numpy() preds_idx = preds.argmax(axis=2) preds_prob = preds.max(axis=2) text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) if label is None: return text label = self.decode(label, is_remove_duplicate=False) return text, label def get_ignored_tokens(self): return [self.padding_idx]