Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into dygraph
commit
4ee160ae84
|
@ -2531,7 +2531,7 @@ class MainWindow(QMainWindow):
|
|||
split = 'test'
|
||||
|
||||
# save dict
|
||||
html = {'structure': {'tokens': token_list}, 'cell': cells}
|
||||
html = {'structure': {'tokens': token_list}, 'cells': cells}
|
||||
json_results.append({'filename': os.path.basename(image_path), 'split': split, 'imgid': imgid, 'html': html})
|
||||
imgid += 1
|
||||
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
English | [简体中文](README_ch.md)
|
||||
|
||||
# PPOCRLabel
|
||||
# PPOCRLabelv2
|
||||
|
||||
PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PP-OCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box, table and multi-point annotation modes. Annotations can be directly used for the training of PP-OCR detection and recognition models.
|
||||
PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PP-OCR model to automatically detect and re-recognize data. It is written in Python3 and PyQT5, supporting rectangular box, table, irregular text and key information annotation modes. Annotations can be directly used for the training of PP-OCR detection and recognition models.
|
||||
|
||||
<img src="./data/gif/steps_en.gif" width="100%"/>
|
||||
| regular text annotation | table annotation |
|
||||
| :-------------------------------------------------: | :--------------------------------------------: |
|
||||
| <img src="./data/gif/steps_en.gif" width="80%"/> | <img src="./data/gif/table.gif" width="100%"/> |
|
||||
| **irregular text annotation** | **key information annotation** |
|
||||
| <img src="./data/gif/multi-point.gif" width="80%"/> | <img src="./data/gif/kie.gif" width="100%"/> |
|
||||
|
||||
### Recent Update
|
||||
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
[English](README.md) | 简体中文
|
||||
|
||||
# PPOCRLabel
|
||||
# PPOCRLabelv2
|
||||
|
||||
PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置PP-OCR模型对数据自动标注和重新识别。使用Python3和PyQT5编写,支持矩形框标注和四点标注模式,导出格式可直接用于PaddleOCR检测和识别模型的训练。
|
||||
|
||||
<img src="./data/gif/steps.gif" width="100%"/>
|
||||
| 常规标注 | 表格标注 |
|
||||
| :-------------------------------------------------: | :--------------------------------------------: |
|
||||
| <img src="./data/gif/steps_en.gif" width="80%"/> | <img src="./data/gif/table.gif" width="100%"/> |
|
||||
| **不规则文本标注** | **关键信息标注** |
|
||||
| <img src="./data/gif/multi-point.gif" width="80%"/> | <img src="./data/gif/kie.gif" width="100%"/> |
|
||||
|
||||
#### 近期更新
|
||||
- 2022.05:**新增表格标注**,使用方法见下方`2.2 表格标注`(by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest))
|
||||
|
|
|
@ -33,7 +33,7 @@ setup(
|
|||
package_dir={'PPOCRLabel': ''},
|
||||
include_package_data=True,
|
||||
entry_points={"console_scripts": ["PPOCRLabel= PPOCRLabel.PPOCRLabel:main"]},
|
||||
version='1.0.2',
|
||||
version='2.1.1',
|
||||
install_requires=requirements,
|
||||
license='Apache License 2.0',
|
||||
description='PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PPOCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box annotation and four-point annotation modes. Annotations can be directly used for the training of PPOCR detection and recognition models',
|
||||
|
|
|
@ -107,7 +107,7 @@ Train:
|
|||
Eval:
|
||||
dataset:
|
||||
name: PubTabDataSet
|
||||
data_dir: train_data/table/val/
|
||||
data_dir: train_data/table/val/
|
||||
label_file_list: [train_data/table/val.txt]
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
|
|
|
@ -268,11 +268,12 @@ cv::Mat Utility::crop_image(cv::Mat &img, std::vector<int> &area) {
|
|||
|
||||
void Utility::sorted_boxes(std::vector<OCRPredictResult> &ocr_result) {
|
||||
std::sort(ocr_result.begin(), ocr_result.end(), Utility::comparison_box);
|
||||
|
||||
for (int i = 0; i < ocr_result.size() - 1; i++) {
|
||||
if (abs(ocr_result[i + 1].box[0][1] - ocr_result[i].box[0][1]) < 10 &&
|
||||
(ocr_result[i + 1].box[0][0] < ocr_result[i].box[0][0])) {
|
||||
std::swap(ocr_result[i], ocr_result[i + 1]);
|
||||
if (ocr_result.size() > 0) {
|
||||
for (int i = 0; i < ocr_result.size() - 1; i++) {
|
||||
if (abs(ocr_result[i + 1].box[0][1] - ocr_result[i].box[0][1]) < 10 &&
|
||||
(ocr_result[i + 1].box[0][0] < ocr_result[i].box[0][0])) {
|
||||
std::swap(ocr_result[i], ocr_result[i + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
67
paddleocr.py
67
paddleocr.py
|
@ -414,6 +414,33 @@ def get_model_config(type, version, model_type, lang):
|
|||
return model_urls[version][model_type][lang]
|
||||
|
||||
|
||||
def img_decode(content: bytes):
|
||||
np_arr = np.frombuffer(content, dtype=np.uint8)
|
||||
return cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
|
||||
|
||||
|
||||
def check_img(img):
|
||||
if isinstance(img, bytes):
|
||||
img = img_decode(img)
|
||||
if isinstance(img, str):
|
||||
# download net image
|
||||
if is_link(img):
|
||||
download_with_progressbar(img, 'tmp.jpg')
|
||||
img = 'tmp.jpg'
|
||||
image_file = img
|
||||
img, flag, _ = check_and_read(image_file)
|
||||
if not flag:
|
||||
with open(image_file, 'rb') as f:
|
||||
img = img_decode(f.read())
|
||||
if img is None:
|
||||
logger.error("error in loading image:{}".format(image_file))
|
||||
return None
|
||||
if isinstance(img, np.ndarray) and len(img.shape) == 2:
|
||||
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||
|
||||
return img
|
||||
|
||||
|
||||
class PaddleOCR(predict_system.TextSystem):
|
||||
def __init__(self, **kwargs):
|
||||
"""
|
||||
|
@ -482,7 +509,7 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
rec: use text recognition or not. If false, only det will be exec. Default is True
|
||||
cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
|
||||
"""
|
||||
assert isinstance(img, (np.ndarray, list, str))
|
||||
assert isinstance(img, (np.ndarray, list, str, bytes))
|
||||
if isinstance(img, list) and det == True:
|
||||
logger.error('When input a list of images, det must be false')
|
||||
exit(0)
|
||||
|
@ -491,22 +518,8 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
|
||||
)
|
||||
|
||||
if isinstance(img, str):
|
||||
# download net image
|
||||
if img.startswith('http'):
|
||||
download_with_progressbar(img, 'tmp.jpg')
|
||||
img = 'tmp.jpg'
|
||||
image_file = img
|
||||
img, flag, _ = check_and_read(image_file)
|
||||
if not flag:
|
||||
with open(image_file, 'rb') as f:
|
||||
np_arr = np.frombuffer(f.read(), dtype=np.uint8)
|
||||
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
logger.error("error in loading image:{}".format(image_file))
|
||||
return None
|
||||
if isinstance(img, np.ndarray) and len(img.shape) == 2:
|
||||
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||
img = check_img(img)
|
||||
|
||||
if det and rec:
|
||||
dt_boxes, rec_res, _ = self.__call__(img, cls)
|
||||
return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
|
||||
|
@ -585,23 +598,7 @@ class PPStructure(StructureSystem):
|
|||
super().__init__(params)
|
||||
|
||||
def __call__(self, img, return_ocr_result_in_table=False, img_idx=0):
|
||||
if isinstance(img, str):
|
||||
# download net image
|
||||
if img.startswith('http'):
|
||||
download_with_progressbar(img, 'tmp.jpg')
|
||||
img = 'tmp.jpg'
|
||||
image_file = img
|
||||
img, flag, _ = check_and_read(image_file)
|
||||
if not flag:
|
||||
with open(image_file, 'rb') as f:
|
||||
np_arr = np.frombuffer(f.read(), dtype=np.uint8)
|
||||
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
logger.error("error in loading image:{}".format(image_file))
|
||||
return None
|
||||
if isinstance(img, np.ndarray) and len(img.shape) == 2:
|
||||
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||
|
||||
img = check_img(img)
|
||||
res, _ = super().__call__(
|
||||
img, return_ocr_result_in_table, img_idx=img_idx)
|
||||
return res
|
||||
|
@ -644,7 +641,7 @@ def main():
|
|||
|
||||
if not flag_pdf:
|
||||
if img is None:
|
||||
logger.error("error in loading image:{}".format(image_file))
|
||||
logger.error("error in loading image:{}".format(img_path))
|
||||
continue
|
||||
img_paths = [[img_path, img]]
|
||||
else:
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
Global:
|
||||
use_gpu: True
|
||||
epoch_num: &epoch_num 200
|
||||
log_smooth_window: 10
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/ser_layoutxlm_xfund_zh
|
||||
save_epoch_step: 2000
|
||||
# evaluation is run every 10 iterations after the 0th iteration
|
||||
eval_batch_step: [ 0, 187 ]
|
||||
cal_metric_during_train: False
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
seed: 2022
|
||||
infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
|
||||
save_res_path: ./output/ser_layoutxlm_xfund_zh/res
|
||||
|
||||
Architecture:
|
||||
model_type: kie
|
||||
algorithm: &algorithm "LayoutXLM"
|
||||
Transform:
|
||||
Backbone:
|
||||
name: LayoutXLMForSer
|
||||
pretrained: True
|
||||
checkpoints:
|
||||
num_classes: &num_classes 7
|
||||
|
||||
Loss:
|
||||
name: VQASerTokenLayoutLMLoss
|
||||
num_classes: *num_classes
|
||||
key: "backbone_out"
|
||||
|
||||
Optimizer:
|
||||
name: AdamW
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Linear
|
||||
learning_rate: 0.00005
|
||||
epochs: *epoch_num
|
||||
warmup_epoch: 2
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 0.00000
|
||||
|
||||
PostProcess:
|
||||
name: VQASerTokenLayoutLMPostProcess
|
||||
class_path: &class_path train_data/XFUND/class_list_xfun.txt
|
||||
|
||||
Metric:
|
||||
name: VQASerTokenMetric
|
||||
main_indicator: hmean
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/XFUND/zh_train/image
|
||||
label_file_list:
|
||||
- train_data/XFUND/zh_train/train.json
|
||||
ratio_list: [ 1.0 ]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: RGB
|
||||
channel_first: False
|
||||
- VQATokenLabelEncode: # Class handling label
|
||||
contains_re: False
|
||||
algorithm: *algorithm
|
||||
class_path: *class_path
|
||||
- VQATokenPad:
|
||||
max_seq_len: &max_seq_len 512
|
||||
return_attention_mask: True
|
||||
- VQASerTokenChunk:
|
||||
max_seq_len: *max_seq_len
|
||||
- Resize:
|
||||
size: [224,224]
|
||||
- NormalizeImage:
|
||||
scale: 1
|
||||
mean: [ 123.675, 116.28, 103.53 ]
|
||||
std: [ 58.395, 57.12, 57.375 ]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
batch_size_per_card: 8
|
||||
num_workers: 4
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/XFUND/zh_val/image
|
||||
label_file_list:
|
||||
- train_data/XFUND/zh_val/val.json
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: RGB
|
||||
channel_first: False
|
||||
- VQATokenLabelEncode: # Class handling label
|
||||
contains_re: False
|
||||
algorithm: *algorithm
|
||||
class_path: *class_path
|
||||
- VQATokenPad:
|
||||
max_seq_len: *max_seq_len
|
||||
return_attention_mask: True
|
||||
- VQASerTokenChunk:
|
||||
max_seq_len: *max_seq_len
|
||||
- Resize:
|
||||
size: [224,224]
|
||||
- NormalizeImage:
|
||||
scale: 1
|
||||
mean: [ 123.675, 116.28, 103.53 ]
|
||||
std: [ 58.395, 57.12, 57.375 ]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 8
|
||||
num_workers: 4
|
|
@ -13,7 +13,7 @@ train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg
|
|||
null:null
|
||||
##
|
||||
trainer:norm_train
|
||||
norm_train:tools/train.py -c configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml -o Global.print_batch_step=1 Global.eval_batch_step=[1000,1000] Train.loader.shuffle=false
|
||||
norm_train:tools/train.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Global.print_batch_step=1 Global.eval_batch_step=[1000,1000] Train.loader.shuffle=false
|
||||
pact_train:null
|
||||
fpgm_train:null
|
||||
distill_train:null
|
||||
|
@ -27,7 +27,7 @@ null:null
|
|||
===========================infer_params===========================
|
||||
Global.save_inference_dir:./output/
|
||||
Architecture.Backbone.checkpoints:
|
||||
norm_export:tools/export_model.py -c configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml -o
|
||||
norm_export:tools/export_model.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
|
||||
quant_export:
|
||||
fpgm_export:
|
||||
distill_export:null
|
||||
|
|
|
@ -37,8 +37,8 @@ export2:null
|
|||
infer_model:null
|
||||
infer_export:null
|
||||
infer_quant:False
|
||||
inference:ppstructure/table/predict_structure.py --table_char_dict_path=./ppocr/utils/dict/table_master_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --output ./output/table --table_algorithm=TableMaster --table_max_len=480
|
||||
--use_gpu:True|False
|
||||
inference:ppstructure/table/predict_structure.py --table_char_dict_path=./ppocr/utils/dict/table_master_structure_dict.txt --output ./output/table --table_algorithm=TableMaster --table_max_len=480
|
||||
--use_gpu:True
|
||||
--enable_mkldnn:False
|
||||
--cpu_threads:6
|
||||
--rec_batch_num:1
|
||||
|
|
|
@ -21,7 +21,11 @@ model_name=$(func_parser_value "${lines[1]}")
|
|||
trainer_list=$(func_parser_value "${lines[14]}")
|
||||
|
||||
if [ ${MODE} = "benchmark_train" ];then
|
||||
pip install -r requirements.txt
|
||||
python_name_list=$(func_parser_value "${lines[2]}")
|
||||
array=(${python_name_list})
|
||||
python_name=${array[0]}
|
||||
${python_name} -m pip install -r requirements.txt
|
||||
${python_name} -m pip install git+https://github.com/LDOUBLEV/AutoLog
|
||||
if [[ ${model_name} =~ "ch_ppocr_mobile_v2_0_det" || ${model_name} =~ "det_mv3_db_v2_0" ]];then
|
||||
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
|
||||
rm -rf ./train_data/icdar2015
|
||||
|
@ -107,8 +111,8 @@ if [ ${MODE} = "benchmark_train" ];then
|
|||
cd ../
|
||||
fi
|
||||
if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then
|
||||
pip install -r ppstructure/kie/requirements.txt
|
||||
pip install opencv-python -U
|
||||
${python_name} -m pip install -r ppstructure/kie/requirements.txt
|
||||
${python_name} -m pip install opencv-python -U
|
||||
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
|
||||
cd ./train_data/ && tar xf XFUND.tar
|
||||
# expand gt.txt 10 times
|
||||
|
@ -122,6 +126,11 @@ if [ ${MODE} = "benchmark_train" ];then
|
|||
fi
|
||||
|
||||
if [ ${MODE} = "lite_train_lite_infer" ];then
|
||||
python_name_list=$(func_parser_value "${lines[2]}")
|
||||
array=(${python_name_list})
|
||||
python_name=${array[0]}
|
||||
${python_name} -m pip install -r requirements.txt
|
||||
${python_name} -m pip install git+https://github.com/LDOUBLEV/AutoLog
|
||||
# pretrain lite train data
|
||||
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
|
||||
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate
|
||||
|
@ -212,6 +221,10 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
|
|||
if [ ${model_name} == "ch_ppocr_mobile_v2_0_rec_FPGM" ]; then
|
||||
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar --no-check-certificate
|
||||
cd ./pretrain_models/ && tar xf ch_ppocr_mobile_v2.0_rec_train.tar && cd ../
|
||||
${python_name} -m pip install paddleslim
|
||||
fi
|
||||
if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_FPGM" ]; then
|
||||
${python_name} -m pip install paddleslim
|
||||
fi
|
||||
if [ ${model_name} == "det_mv3_east_v2_0" ]; then
|
||||
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar --no-check-certificate
|
||||
|
@ -230,8 +243,8 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
|
|||
cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
|
||||
fi
|
||||
if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then
|
||||
pip install -r ppstructure/kie/requirements.txt
|
||||
pip install opencv-python -U
|
||||
${python_name} -m pip install -r ppstructure/kie/requirements.txt
|
||||
${python_name} -m pip install opencv-python -U
|
||||
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
|
||||
cd ./train_data/ && tar xf XFUND.tar
|
||||
cd ../
|
||||
|
@ -639,6 +652,7 @@ if [ ${MODE} = "serving_infer" ];then
|
|||
${python_name} -m pip install paddle-serving-server-gpu
|
||||
${python_name} -m pip install paddle_serving_client
|
||||
${python_name} -m pip install paddle-serving-app
|
||||
${python_name} -m pip install git+https://github.com/LDOUBLEV/AutoLog
|
||||
# wget model
|
||||
if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_KL" ] || [ ${model_name} == "ch_ppocr_mobile_v2.0_rec_KL" ] ; then
|
||||
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/tipc_fake_model/ch_ppocr_mobile_v2.0_det_klquant_infer.tar --no-check-certificate
|
||||
|
|
Loading…
Reference in New Issue