From f01dbb5648a97af6a3160e5653affe5a0bb3dc4e Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 20 Dec 2021 14:23:36 +0000
Subject: [PATCH 01/35] add LayoutLM ser
---
ppstructure/vqa/README.md | 16 +++++++----
ppstructure/vqa/eval_ser.py | 47 +++++++++++++++++++++---------
ppstructure/vqa/infer_re.py | 9 ++++--
ppstructure/vqa/infer_ser.py | 47 +++++++++++++++++++-----------
ppstructure/vqa/infer_ser_e2e.py | 49 +++++++++++++++++++++-----------
ppstructure/vqa/losses.py | 35 +++++++++++++++++++++++
ppstructure/vqa/train_ser.py | 48 ++++++++++++++++++++++---------
ppstructure/vqa/utils.py | 2 ++
8 files changed, 185 insertions(+), 68 deletions(-)
create mode 100644 ppstructure/vqa/losses.py
diff --git a/ppstructure/vqa/README.md b/ppstructure/vqa/README.md
index 2216950e5..51a616c43 100644
--- a/ppstructure/vqa/README.md
+++ b/ppstructure/vqa/README.md
@@ -18,12 +18,13 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进
## 1 性能
-我们在 [XFUN](https://github.com/doc-analysis/XFUND) 评估数据集上对算法进行了评估,性能如下
+我们在 [XFUN](https://github.com/doc-analysis/XFUND) 的中文数据集上对算法进行了评估,性能如下
-|任务| f1 | 模型下载地址|
-|:---:|:---:| :---:|
-|SER|0.9056| [链接](https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_ser_pretrained.tar)|
-|RE|0.7113| [链接](https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_re_pretrained.tar)|
+| 模型 | 任务 | f1 | 模型下载地址 |
+|:---:|:---:|:---:| :---:|
+| LayoutXLM | RE | 0.7113 | [链接](https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_re_pretrained.tar) |
+| LayoutXLM | SER | 0.9056 | [链接](https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_ser_pretrained.tar) |
+| LayoutLM | SER | 0.78 | [链接](https://paddleocr.bj.bcebos.com/pplayout/LayoutLM_ser_pretrained.tar) |
@@ -135,6 +136,7 @@ wget https://paddleocr.bj.bcebos.com/dataset/XFUND.tar
```shell
python3.7 train_ser.py \
--model_name_or_path "layoutxlm-base-uncased" \
+ --ser_model_type "LayoutLM" \
--train_data_dir "XFUND/zh_train/image" \
--train_label_path "XFUND/zh_train/xfun_normalize_train.json" \
--eval_data_dir "XFUND/zh_val/image" \
@@ -155,6 +157,7 @@ python3.7 train_ser.py \
```shell
python3.7 train_ser.py \
--model_name_or_path "model_path" \
+ --ser_model_type "LayoutXLM" \
--train_data_dir "XFUND/zh_train/image" \
--train_label_path "XFUND/zh_train/xfun_normalize_train.json" \
--eval_data_dir "XFUND/zh_val/image" \
@@ -175,6 +178,7 @@ python3.7 train_ser.py \
export CUDA_VISIBLE_DEVICES=0
python3 eval_ser.py \
--model_name_or_path "PP-Layout_v1.0_ser_pretrained/" \
+ --ser_model_type "LayoutXLM" \
--eval_data_dir "XFUND/zh_val/image" \
--eval_label_path "XFUND/zh_val/xfun_normalize_val.json" \
--per_gpu_eval_batch_size 8 \
@@ -190,6 +194,7 @@ python3 eval_ser.py \
export CUDA_VISIBLE_DEVICES=0
python3.7 infer_ser.py \
--model_name_or_path "./PP-Layout_v1.0_ser_pretrained/" \
+ --ser_model_type "LayoutXLM" \
--output_dir "output_res/" \
--infer_imgs "XFUND/zh_val/image/" \
--ocr_json_path "XFUND/zh_val/xfun_normalize_val.json"
@@ -203,6 +208,7 @@ python3.7 infer_ser.py \
export CUDA_VISIBLE_DEVICES=0
python3.7 infer_ser_e2e.py \
--model_name_or_path "./output/PP-Layout_v1.0_ser_pretrained/" \
+ --ser_model_type "LayoutXLM" \
--max_seq_length 512 \
--output_dir "output_res_e2e/" \
--infer_imgs "images/input/zh_val_0.jpg"
diff --git a/ppstructure/vqa/eval_ser.py b/ppstructure/vqa/eval_ser.py
index acf37452a..52eeb8a1d 100644
--- a/ppstructure/vqa/eval_ser.py
+++ b/ppstructure/vqa/eval_ser.py
@@ -29,11 +29,21 @@ import paddle
import numpy as np
from seqeval.metrics import classification_report, f1_score, precision_score, recall_score
from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification
+from paddlenlp.transformers import LayoutLMModel, LayoutLMTokenizer, LayoutLMForTokenClassification
+
from xfun import XFUNDataset
+from losses import SERLoss
from utils import parse_args, get_bio_label_maps, print_arguments
from ppocr.utils.logging import get_logger
+MODELS = {
+ 'LayoutXLM':
+ (LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForTokenClassification),
+ 'LayoutLM':
+ (LayoutLMTokenizer, LayoutLMModel, LayoutLMForTokenClassification)
+}
+
def eval(args):
logger = get_logger()
@@ -42,9 +52,9 @@ def eval(args):
label2id_map, id2label_map = get_bio_label_maps(args.label_map_path)
pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
- tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
- model = LayoutXLMForTokenClassification.from_pretrained(
- args.model_name_or_path)
+ tokenizer_class, base_model_class, model_class = MODELS[args.ser_model_type]
+ tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path)
+ model = model_class.from_pretrained(args.model_name_or_path)
eval_dataset = XFUNDataset(
tokenizer,
@@ -65,8 +75,11 @@ def eval(args):
use_shared_memory=True,
collate_fn=None, )
- results, _ = evaluate(args, model, tokenizer, eval_dataloader, label2id_map,
- id2label_map, pad_token_label_id, logger)
+ loss_class = SERLoss(len(label2id_map))
+
+ results, _ = evaluate(args, model, tokenizer, loss_class, eval_dataloader,
+ label2id_map, id2label_map, pad_token_label_id,
+ logger)
logger.info(results)
@@ -74,6 +87,7 @@ def eval(args):
def evaluate(args,
model,
tokenizer,
+ loss_class,
eval_dataloader,
label2id_map,
id2label_map,
@@ -88,24 +102,29 @@ def evaluate(args,
model.eval()
for idx, batch in enumerate(eval_dataloader):
with paddle.no_grad():
+ if args.ser_model_type == 'LayoutLM':
+ if 'image' in batch:
+ batch.pop('image')
+ labels = batch.pop('labels')
outputs = model(**batch)
- tmp_eval_loss, logits = outputs[:2]
+ if args.ser_model_type == 'LayoutXLM':
+ outputs = outputs[0]
+ loss = loss_class(labels, outputs, batch['attention_mask'])
- tmp_eval_loss = tmp_eval_loss.mean()
+ loss = loss.mean()
if paddle.distributed.get_rank() == 0:
logger.info("[Eval]process: {}/{}, loss: {:.5f}".format(
- idx, len(eval_dataloader), tmp_eval_loss.numpy()[0]))
+ idx, len(eval_dataloader), loss.numpy()[0]))
- eval_loss += tmp_eval_loss.item()
+ eval_loss += loss.item()
nb_eval_steps += 1
if preds is None:
- preds = logits.numpy()
- out_label_ids = batch["labels"].numpy()
+ preds = outputs.numpy()
+ out_label_ids = labels.numpy()
else:
- preds = np.append(preds, logits.numpy(), axis=0)
- out_label_ids = np.append(
- out_label_ids, batch["labels"].numpy(), axis=0)
+ preds = np.append(preds, outputs.numpy(), axis=0)
+ out_label_ids = np.append(out_label_ids, labels.numpy(), axis=0)
eval_loss = eval_loss / nb_eval_steps
preds = np.argmax(preds, axis=2)
diff --git a/ppstructure/vqa/infer_re.py b/ppstructure/vqa/infer_re.py
index 139ade9aa..2ffa60f5d 100644
--- a/ppstructure/vqa/infer_re.py
+++ b/ppstructure/vqa/infer_re.py
@@ -56,7 +56,11 @@ def infer(args):
ocr_info_list = load_ocr(args.eval_data_dir, args.eval_label_path)
for idx, batch in enumerate(eval_dataloader):
- logger.info("[Infer] process: {}/{}".format(idx, len(eval_dataloader)))
+ save_img_path = os.path.join(
+ args.output_dir,
+ os.path.splitext(os.path.basename(img_path))[0] + "_re.jpg")
+ logger.info("[Infer] process: {}/{}, save_result to {}".format(
+ idx, len(eval_dataloader), save_img_path))
with paddle.no_grad():
outputs = model(**batch)
pred_relations = outputs['pred_relations']
@@ -85,8 +89,7 @@ def infer(args):
img = cv2.imread(image_path)
img_show = draw_re_results(img, result)
- save_path = os.path.join(args.output_dir, os.path.basename(image_path))
- cv2.imwrite(save_path, img_show)
+ cv2.imwrite(save_img_path, img_show)
def load_ocr(img_folder, json_path):
diff --git a/ppstructure/vqa/infer_ser.py b/ppstructure/vqa/infer_ser.py
index 3c7ad03aa..2b715d7b9 100644
--- a/ppstructure/vqa/infer_ser.py
+++ b/ppstructure/vqa/infer_ser.py
@@ -24,6 +24,14 @@ import paddle
# relative reference
from utils import parse_args, get_image_file_list, draw_ser_results, get_bio_label_maps
from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification
+from paddlenlp.transformers import LayoutLMModel, LayoutLMTokenizer, LayoutLMForTokenClassification
+
+MODELS = {
+ 'LayoutXLM':
+ (LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForTokenClassification),
+ 'LayoutLM':
+ (LayoutLMTokenizer, LayoutLMModel, LayoutLMForTokenClassification)
+}
def pad_sentences(tokenizer,
@@ -217,10 +225,10 @@ def infer(args):
os.makedirs(args.output_dir, exist_ok=True)
# init token and model
- tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
- # model = LayoutXLMModel.from_pretrained(args.model_name_or_path)
- model = LayoutXLMForTokenClassification.from_pretrained(
- args.model_name_or_path)
+ tokenizer_class, base_model_class, model_class = MODELS[args.ser_model_type]
+ tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path)
+ model = model_class.from_pretrained(args.model_name_or_path)
+
model.eval()
# load ocr results json
@@ -240,7 +248,10 @@ def infer(args):
"w",
encoding='utf-8') as fout:
for idx, img_path in enumerate(infer_imgs):
- print("process: [{}/{}]".format(idx, len(infer_imgs), img_path))
+ save_img_path = os.path.join(args.output_dir,
+ os.path.basename(img_path))
+ print("process: [{}/{}], save_result to {}".format(
+ idx, len(infer_imgs), save_img_path))
img = cv2.imread(img_path)
@@ -250,15 +261,21 @@ def infer(args):
ori_img=img,
ocr_info=ocr_info,
max_seq_len=args.max_seq_length)
+ if args.ser_model_type == 'LayoutLM':
+ preds = model(
+ input_ids=inputs["input_ids"],
+ bbox=inputs["bbox"],
+ token_type_ids=inputs["token_type_ids"],
+ attention_mask=inputs["attention_mask"])
+ elif args.ser_model_type == 'LayoutXLM':
+ preds = model(
+ input_ids=inputs["input_ids"],
+ bbox=inputs["bbox"],
+ image=inputs["image"],
+ token_type_ids=inputs["token_type_ids"],
+ attention_mask=inputs["attention_mask"])
+ preds = preds[0]
- outputs = model(
- input_ids=inputs["input_ids"],
- bbox=inputs["bbox"],
- image=inputs["image"],
- token_type_ids=inputs["token_type_ids"],
- attention_mask=inputs["attention_mask"])
-
- preds = outputs[0]
preds = postprocess(inputs["attention_mask"], preds,
args.label_map_path)
ocr_info = merge_preds_list_with_ocr_info(
@@ -271,9 +288,7 @@ def infer(args):
}, ensure_ascii=False) + "\n")
img_res = draw_ser_results(img, ocr_info)
- cv2.imwrite(
- os.path.join(args.output_dir, os.path.basename(img_path)),
- img_res)
+ cv2.imwrite(save_img_path, img_res)
return
diff --git a/ppstructure/vqa/infer_ser_e2e.py b/ppstructure/vqa/infer_ser_e2e.py
index a93a98ff6..05a029822 100644
--- a/ppstructure/vqa/infer_ser_e2e.py
+++ b/ppstructure/vqa/infer_ser_e2e.py
@@ -22,12 +22,20 @@ from PIL import Image
import paddle
from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification
+from paddlenlp.transformers import LayoutLMModel, LayoutLMTokenizer, LayoutLMForTokenClassification
# relative reference
from utils import parse_args, get_image_file_list, draw_ser_results, get_bio_label_maps
from utils import pad_sentences, split_page, preprocess, postprocess, merge_preds_list_with_ocr_info
+MODELS = {
+ 'LayoutXLM':
+ (LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForTokenClassification),
+ 'LayoutLM':
+ (LayoutLMTokenizer, LayoutLMModel, LayoutLMForTokenClassification)
+}
+
def trans_poly_to_bbox(poly):
x1 = np.min([p[0] for p in poly])
@@ -50,14 +58,15 @@ def parse_ocr_info_for_ser(ocr_result):
class SerPredictor(object):
def __init__(self, args):
-
+ self.args = args
self.max_seq_length = args.max_seq_length
# init ser token and model
- self.tokenizer = LayoutXLMTokenizer.from_pretrained(
- args.model_name_or_path)
- self.model = LayoutXLMForTokenClassification.from_pretrained(
+ tokenizer_class, base_model_class, model_class = MODELS[
+ args.ser_model_type]
+ self.tokenizer = tokenizer_class.from_pretrained(
args.model_name_or_path)
+ self.model = model_class.from_pretrained(args.model_name_or_path)
self.model.eval()
# init ocr_engine
@@ -89,14 +98,21 @@ class SerPredictor(object):
ocr_info=ocr_info,
max_seq_len=self.max_seq_length)
- outputs = self.model(
- input_ids=inputs["input_ids"],
- bbox=inputs["bbox"],
- image=inputs["image"],
- token_type_ids=inputs["token_type_ids"],
- attention_mask=inputs["attention_mask"])
+ if args.ser_model_type == 'LayoutLM':
+ preds = self.model(
+ input_ids=inputs["input_ids"],
+ bbox=inputs["bbox"],
+ token_type_ids=inputs["token_type_ids"],
+ attention_mask=inputs["attention_mask"])
+ elif args.ser_model_type == 'LayoutXLM':
+ preds = self.model(
+ input_ids=inputs["input_ids"],
+ bbox=inputs["bbox"],
+ image=inputs["image"],
+ token_type_ids=inputs["token_type_ids"],
+ attention_mask=inputs["attention_mask"])
+ preds = preds[0]
- preds = outputs[0]
preds = postprocess(inputs["attention_mask"], preds, self.id2label_map)
ocr_info = merge_preds_list_with_ocr_info(
ocr_info, inputs["segment_offset_id"], preds,
@@ -118,7 +134,11 @@ if __name__ == "__main__":
"w",
encoding='utf-8') as fout:
for idx, img_path in enumerate(infer_imgs):
- print("process: [{}/{}], {}".format(idx, len(infer_imgs), img_path))
+ save_img_path = os.path.join(
+ args.output_dir,
+ os.path.splitext(os.path.basename(img_path))[0] + "_ser.jpg")
+ print("process: [{}/{}], save_result to {}".format(
+ idx, len(infer_imgs), save_img_path))
img = cv2.imread(img_path)
@@ -129,7 +149,4 @@ if __name__ == "__main__":
}, ensure_ascii=False) + "\n")
img_res = draw_ser_results(img, result)
- cv2.imwrite(
- os.path.join(args.output_dir,
- os.path.splitext(os.path.basename(img_path))[0] +
- "_ser.jpg"), img_res)
+ cv2.imwrite(save_img_path, img_res)
diff --git a/ppstructure/vqa/losses.py b/ppstructure/vqa/losses.py
new file mode 100644
index 000000000..e8dad01c3
--- /dev/null
+++ b/ppstructure/vqa/losses.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle import nn
+
+
+class SERLoss(nn.Layer):
+ def __init__(self, num_classes):
+ super().__init__()
+ self.loss_class = nn.CrossEntropyLoss()
+ self.num_classes = num_classes
+ self.ignore_index = self.loss_class.ignore_index
+
+ def forward(self, labels, outputs, attention_mask):
+ if attention_mask is not None:
+ active_loss = attention_mask.reshape([-1, ]) == 1
+ active_outputs = outputs.reshape(
+ [-1, self.num_classes])[active_loss]
+ active_labels = labels.reshape([-1, ])[active_loss]
+ loss = self.loss_class(active_outputs, active_labels)
+ else:
+ loss = self.loss_class(
+ outputs.reshape([-1, self.num_classes]), labels.reshape([-1, ]))
+ return loss
diff --git a/ppstructure/vqa/train_ser.py b/ppstructure/vqa/train_ser.py
index 6791cea89..2670ef9ee 100644
--- a/ppstructure/vqa/train_ser.py
+++ b/ppstructure/vqa/train_ser.py
@@ -29,11 +29,21 @@ import paddle
import numpy as np
from seqeval.metrics import classification_report, f1_score, precision_score, recall_score
from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification
+from paddlenlp.transformers import LayoutLMModel, LayoutLMTokenizer, LayoutLMForTokenClassification
+
from xfun import XFUNDataset
from utils import parse_args, get_bio_label_maps, print_arguments, set_seed
from eval_ser import evaluate
+from losses import SERLoss
from ppocr.utils.logging import get_logger
+MODELS = {
+ 'LayoutXLM':
+ (LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForTokenClassification),
+ 'LayoutLM':
+ (LayoutLMTokenizer, LayoutLMModel, LayoutLMForTokenClassification)
+}
+
def train(args):
os.makedirs(args.output_dir, exist_ok=True)
@@ -44,22 +54,24 @@ def train(args):
print_arguments(args, logger)
label2id_map, id2label_map = get_bio_label_maps(args.label_map_path)
- pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
+ loss_class = SERLoss(len(label2id_map))
+
+ pad_token_label_id = loss_class.ignore_index
# dist mode
if distributed:
paddle.distributed.init_parallel_env()
- tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
+ tokenizer_class, base_model_class, model_class = MODELS[args.ser_model_type]
+ tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path)
if not args.resume:
- model = LayoutXLMModel.from_pretrained(args.model_name_or_path)
- model = LayoutXLMForTokenClassification(
- model, num_classes=len(label2id_map), dropout=None)
+ base_model = base_model_class.from_pretrained(args.model_name_or_path)
+ model = model_class(
+ base_model, num_classes=len(label2id_map), dropout=None)
logger.info('train from scratch')
else:
logger.info('resume from {}'.format(args.model_name_or_path))
- model = LayoutXLMForTokenClassification.from_pretrained(
- args.model_name_or_path)
+ model = model_class.from_pretrained(args.model_name_or_path)
# dist mode
if distributed:
@@ -153,12 +165,19 @@ def train(args):
for step, batch in enumerate(train_dataloader):
train_reader_cost += time.time() - reader_start
+ if args.ser_model_type == 'LayoutLM':
+ if 'image' in batch:
+ batch.pop('image')
+ labels = batch.pop('labels')
+
train_start = time.time()
outputs = model(**batch)
train_run_cost += time.time() - train_start
+ if args.ser_model_type == 'LayoutXLM':
+ outputs = outputs[0]
+ loss = loss_class(labels, outputs, batch['attention_mask'])
# model outputs are always tuple in ppnlp (see doc)
- loss = outputs[0]
loss = loss.mean()
loss.backward()
tr_loss += loss.item()
@@ -166,7 +185,7 @@ def train(args):
lr_scheduler.step() # Update learning rate schedule
optimizer.clear_grad()
global_step += 1
- total_samples += batch['image'].shape[0]
+ total_samples += batch['input_ids'].shape[0]
if rank == 0 and step % print_step == 0:
logger.info(
@@ -186,9 +205,9 @@ def train(args):
if rank == 0 and args.eval_steps > 0 and global_step % args.eval_steps == 0 and args.evaluate_during_training:
# Log metrics
# Only evaluate when single GPU otherwise metrics may not average well
- results, _ = evaluate(args, model, tokenizer, eval_dataloader,
- label2id_map, id2label_map,
- pad_token_label_id, logger)
+ results, _ = evaluate(args, model, tokenizer, loss_class,
+ eval_dataloader, label2id_map,
+ id2label_map, pad_token_label_id, logger)
if best_metrics is None or results["f1"] >= best_metrics["f1"]:
best_metrics = copy.deepcopy(results)
@@ -201,7 +220,8 @@ def train(args):
tokenizer.save_pretrained(output_dir)
paddle.save(args,
os.path.join(output_dir, "training_args.bin"))
- logger.info("Saving model checkpoint to %s", output_dir)
+ logger.info("Saving model checkpoint to {}".format(
+ output_dir))
logger.info("[epoch {}/{}][iter: {}/{}] results: {}".format(
epoch_id, args.num_train_epochs, step,
@@ -219,7 +239,7 @@ def train(args):
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
paddle.save(args, os.path.join(output_dir, "training_args.bin"))
- logger.info("Saving model checkpoint to %s", output_dir)
+ logger.info("Saving model checkpoint to {}".format(output_dir))
return global_step, tr_loss / global_step
diff --git a/ppstructure/vqa/utils.py b/ppstructure/vqa/utils.py
index 414317c0d..b9f2edc86 100644
--- a/ppstructure/vqa/utils.py
+++ b/ppstructure/vqa/utils.py
@@ -350,6 +350,8 @@ def parse_args():
# yapf: disable
parser.add_argument("--model_name_or_path",
default=None, type=str, required=True,)
+ parser.add_argument("--ser_model_type",
+ default='LayoutXLM', type=str)
parser.add_argument("--re_model_name_or_path",
default=None, type=str, required=False,)
parser.add_argument("--train_data_dir", default=None,
From 9131c4a7acc12ba79c7a60aef72a94abb5d730eb Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 20 Dec 2021 14:38:49 +0000
Subject: [PATCH 02/35] add LayoutLM ser
---
ppstructure/vqa/README.md | 10 ++---
ppstructure/vqa/infer.sh | 61 +++++++++++++++++++++++++++++
ppstructure/vqa/infer_re.py | 10 ++---
ppstructure/vqa/infer_ser_e2e.py | 4 +-
ppstructure/vqa/infer_ser_re_e2e.py | 11 +++---
5 files changed, 79 insertions(+), 17 deletions(-)
create mode 100644 ppstructure/vqa/infer.sh
diff --git a/ppstructure/vqa/README.md b/ppstructure/vqa/README.md
index 51a616c43..975139c79 100644
--- a/ppstructure/vqa/README.md
+++ b/ppstructure/vqa/README.md
@@ -195,7 +195,7 @@ export CUDA_VISIBLE_DEVICES=0
python3.7 infer_ser.py \
--model_name_or_path "./PP-Layout_v1.0_ser_pretrained/" \
--ser_model_type "LayoutXLM" \
- --output_dir "output_res/" \
+ --output_dir "output/ser/" \
--infer_imgs "XFUND/zh_val/image/" \
--ocr_json_path "XFUND/zh_val/xfun_normalize_val.json"
```
@@ -210,7 +210,7 @@ python3.7 infer_ser_e2e.py \
--model_name_or_path "./output/PP-Layout_v1.0_ser_pretrained/" \
--ser_model_type "LayoutXLM" \
--max_seq_length 512 \
- --output_dir "output_res_e2e/" \
+ --output_dir "output/ser_e2e/" \
--infer_imgs "images/input/zh_val_0.jpg"
```
@@ -284,7 +284,7 @@ python3 eval_re.py \
--eval_data_dir "XFUND/zh_val/image" \
--eval_label_path "XFUND/zh_val/xfun_normalize_val.json" \
--label_map_path 'labels/labels_ser.txt' \
- --output_dir "output/re_test/" \
+ --output_dir "output/re/" \
--per_gpu_eval_batch_size 8 \
--num_workers 8 \
--seed 2048
@@ -302,7 +302,7 @@ python3 infer_re.py \
--eval_data_dir "XFUND/zh_val/image" \
--eval_label_path "XFUND/zh_val/xfun_normalize_val.json" \
--label_map_path 'labels/labels_ser.txt' \
- --output_dir "output_res" \
+ --output_dir "output/re/" \
--per_gpu_eval_batch_size 1 \
--seed 2048
```
@@ -317,7 +317,7 @@ python3.7 infer_ser_re_e2e.py \
--model_name_or_path "./PP-Layout_v1.0_ser_pretrained/" \
--re_model_name_or_path "./PP-Layout_v1.0_re_pretrained/" \
--max_seq_length 512 \
- --output_dir "output_ser_re_e2e_train/" \
+ --output_dir "output/ser_re_e2e/" \
--infer_imgs "images/input/zh_val_21.jpg"
```
diff --git a/ppstructure/vqa/infer.sh b/ppstructure/vqa/infer.sh
new file mode 100644
index 000000000..2cd1cea44
--- /dev/null
+++ b/ppstructure/vqa/infer.sh
@@ -0,0 +1,61 @@
+export CUDA_VISIBLE_DEVICES=6
+# python3.7 infer_ser_e2e.py \
+# --model_name_or_path "output/ser_distributed/best_model" \
+# --max_seq_length 512 \
+# --output_dir "output_res_e2e/" \
+# --infer_imgs "/ssd1/zhoujun20/VQA/data/XFUN_v1.0_data/zh.val/zh_val_0.jpg"
+
+
+# python3.7 infer_ser_re_e2e.py \
+# --model_name_or_path "output/ser_distributed/best_model" \
+# --re_model_name_or_path "output/re_test/best_model" \
+# --max_seq_length 512 \
+# --output_dir "output_ser_re_e2e_train/" \
+# --infer_imgs "images/input/zh_val_21.jpg"
+
+# python3.7 infer_ser.py \
+# --model_name_or_path "output/ser_LayoutLM/best_model" \
+# --ser_model_type "LayoutLM" \
+# --output_dir "ser_LayoutLM/" \
+# --infer_imgs "images/input/zh_val_21.jpg" \
+# --ocr_json_path "/ssd1/zhoujun20/VQA/data/XFUN_v1.0_data/xfun_normalize_val.json"
+
+python3.7 infer_ser.py \
+ --model_name_or_path "output/ser_new/best_model" \
+ --ser_model_type "LayoutXLM" \
+ --output_dir "ser_new/" \
+ --infer_imgs "images/input/zh_val_21.jpg" \
+ --ocr_json_path "/ssd1/zhoujun20/VQA/data/XFUN_v1.0_data/xfun_normalize_val.json"
+
+# python3.7 infer_ser_e2e.py \
+# --model_name_or_path "output/ser_new/best_model" \
+# --ser_model_type "LayoutXLM" \
+# --max_seq_length 512 \
+# --output_dir "output/ser_new/" \
+# --infer_imgs "images/input/zh_val_0.jpg"
+
+
+# python3.7 infer_ser_e2e.py \
+# --model_name_or_path "output/ser_LayoutLM/best_model" \
+# --ser_model_type "LayoutLM" \
+# --max_seq_length 512 \
+# --output_dir "output/ser_LayoutLM/" \
+# --infer_imgs "images/input/zh_val_0.jpg"
+
+# python3 infer_re.py \
+# --model_name_or_path "/ssd1/zhoujun20/VQA/PaddleOCR/ppstructure/vqa/output/re_test/best_model/" \
+# --max_seq_length 512 \
+# --eval_data_dir "/ssd1/zhoujun20/VQA/data/XFUN_v1.0_data/zh.val" \
+# --eval_label_path "/ssd1/zhoujun20/VQA/data/XFUN_v1.0_data/xfun_normalize_val.json" \
+# --label_map_path 'labels/labels_ser.txt' \
+# --output_dir "output_res" \
+# --per_gpu_eval_batch_size 1 \
+# --seed 2048
+
+# python3.7 infer_ser_re_e2e.py \
+# --model_name_or_path "output/ser_LayoutLM/best_model" \
+# --ser_model_type "LayoutLM" \
+# --re_model_name_or_path "output/re_new/best_model" \
+# --max_seq_length 512 \
+# --output_dir "output_ser_re_e2e/" \
+# --infer_imgs "images/input/zh_val_21.jpg"
\ No newline at end of file
diff --git a/ppstructure/vqa/infer_re.py b/ppstructure/vqa/infer_re.py
index 2ffa60f5d..98c61bacc 100644
--- a/ppstructure/vqa/infer_re.py
+++ b/ppstructure/vqa/infer_re.py
@@ -56,19 +56,19 @@ def infer(args):
ocr_info_list = load_ocr(args.eval_data_dir, args.eval_label_path)
for idx, batch in enumerate(eval_dataloader):
+ ocr_info = ocr_info_list[idx]
+ image_path = ocr_info['image_path']
+ ocr_info = ocr_info['ocr_info']
+
save_img_path = os.path.join(
args.output_dir,
- os.path.splitext(os.path.basename(img_path))[0] + "_re.jpg")
+ os.path.splitext(os.path.basename(image_path))[0] + "_re.jpg")
logger.info("[Infer] process: {}/{}, save_result to {}".format(
idx, len(eval_dataloader), save_img_path))
with paddle.no_grad():
outputs = model(**batch)
pred_relations = outputs['pred_relations']
- ocr_info = ocr_info_list[idx]
- image_path = ocr_info['image_path']
- ocr_info = ocr_info['ocr_info']
-
# 根据entity里的信息,做token解码后去过滤不要的ocr_info
ocr_info = filter_bg_by_txt(ocr_info, batch, tokenizer)
diff --git a/ppstructure/vqa/infer_ser_e2e.py b/ppstructure/vqa/infer_ser_e2e.py
index 05a029822..bceb3434b 100644
--- a/ppstructure/vqa/infer_ser_e2e.py
+++ b/ppstructure/vqa/infer_ser_e2e.py
@@ -98,13 +98,13 @@ class SerPredictor(object):
ocr_info=ocr_info,
max_seq_len=self.max_seq_length)
- if args.ser_model_type == 'LayoutLM':
+ if self.args.ser_model_type == 'LayoutLM':
preds = self.model(
input_ids=inputs["input_ids"],
bbox=inputs["bbox"],
token_type_ids=inputs["token_type_ids"],
attention_mask=inputs["attention_mask"])
- elif args.ser_model_type == 'LayoutXLM':
+ elif self.args.ser_model_type == 'LayoutXLM':
preds = self.model(
input_ids=inputs["input_ids"],
bbox=inputs["bbox"],
diff --git a/ppstructure/vqa/infer_ser_re_e2e.py b/ppstructure/vqa/infer_ser_re_e2e.py
index 23737406d..a6316b625 100644
--- a/ppstructure/vqa/infer_ser_re_e2e.py
+++ b/ppstructure/vqa/infer_ser_re_e2e.py
@@ -117,7 +117,11 @@ if __name__ == "__main__":
"w",
encoding='utf-8') as fout:
for idx, img_path in enumerate(infer_imgs):
- print("process: [{}/{}], {}".format(idx, len(infer_imgs), img_path))
+ save_img_path = os.path.join(
+ args.output_dir,
+ os.path.splitext(os.path.basename(img_path))[0] + "_re.jpg")
+ print("process: [{}/{}], save_result to {}".format(
+ idx, len(infer_imgs), save_img_path))
img = cv2.imread(img_path)
@@ -128,7 +132,4 @@ if __name__ == "__main__":
}, ensure_ascii=False) + "\n")
img_res = draw_re_results(img, result)
- cv2.imwrite(
- os.path.join(args.output_dir,
- os.path.splitext(os.path.basename(img_path))[0] +
- "_re.jpg"), img_res)
+ cv2.imwrite(save_img_path, img_res)
From 11d6814f365b2e881adbdedc9865f86dae3022a9 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 20 Dec 2021 14:47:43 +0000
Subject: [PATCH 03/35] rm _
---
ppstructure/vqa/infer_re.py | 2 +-
ppstructure/vqa/infer_ser.py | 2 +-
ppstructure/vqa/infer_ser_e2e.py | 2 +-
ppstructure/vqa/infer_ser_re_e2e.py | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/ppstructure/vqa/infer_re.py b/ppstructure/vqa/infer_re.py
index 98c61bacc..7937700a7 100644
--- a/ppstructure/vqa/infer_re.py
+++ b/ppstructure/vqa/infer_re.py
@@ -63,7 +63,7 @@ def infer(args):
save_img_path = os.path.join(
args.output_dir,
os.path.splitext(os.path.basename(image_path))[0] + "_re.jpg")
- logger.info("[Infer] process: {}/{}, save_result to {}".format(
+ logger.info("[Infer] process: {}/{}, save result to {}".format(
idx, len(eval_dataloader), save_img_path))
with paddle.no_grad():
outputs = model(**batch)
diff --git a/ppstructure/vqa/infer_ser.py b/ppstructure/vqa/infer_ser.py
index 2b715d7b9..7994b5449 100644
--- a/ppstructure/vqa/infer_ser.py
+++ b/ppstructure/vqa/infer_ser.py
@@ -250,7 +250,7 @@ def infer(args):
for idx, img_path in enumerate(infer_imgs):
save_img_path = os.path.join(args.output_dir,
os.path.basename(img_path))
- print("process: [{}/{}], save_result to {}".format(
+ print("process: [{}/{}], save result to {}".format(
idx, len(infer_imgs), save_img_path))
img = cv2.imread(img_path)
diff --git a/ppstructure/vqa/infer_ser_e2e.py b/ppstructure/vqa/infer_ser_e2e.py
index bceb3434b..6bb024750 100644
--- a/ppstructure/vqa/infer_ser_e2e.py
+++ b/ppstructure/vqa/infer_ser_e2e.py
@@ -137,7 +137,7 @@ if __name__ == "__main__":
save_img_path = os.path.join(
args.output_dir,
os.path.splitext(os.path.basename(img_path))[0] + "_ser.jpg")
- print("process: [{}/{}], save_result to {}".format(
+ print("process: [{}/{}], save result to {}".format(
idx, len(infer_imgs), save_img_path))
img = cv2.imread(img_path)
diff --git a/ppstructure/vqa/infer_ser_re_e2e.py b/ppstructure/vqa/infer_ser_re_e2e.py
index a6316b625..32d8850a1 100644
--- a/ppstructure/vqa/infer_ser_re_e2e.py
+++ b/ppstructure/vqa/infer_ser_re_e2e.py
@@ -120,7 +120,7 @@ if __name__ == "__main__":
save_img_path = os.path.join(
args.output_dir,
os.path.splitext(os.path.basename(img_path))[0] + "_re.jpg")
- print("process: [{}/{}], save_result to {}".format(
+ print("process: [{}/{}], save result to {}".format(
idx, len(infer_imgs), save_img_path))
img = cv2.imread(img_path)
From bbcabd8145306ea84e1489202c88315981de1ed1 Mon Sep 17 00:00:00 2001
From: changy1105 <18392600593@163.com>
Date: Mon, 20 Dec 2021 22:54:41 +0800
Subject: [PATCH 04/35] [TIPC] Add js infer test
---
.gitignore | 2 +
test_tipc/docs/test_inference_js.md | 50 +++++++++++
test_tipc/prepare_js.sh | 92 ++++++++++++++++++++
test_tipc/test_inference_js.sh | 8 ++
test_tipc/web/expect.json | 20 +++++
test_tipc/web/index.html | 13 +++
test_tipc/web/index.test.js | 82 ++++++++++++++++++
test_tipc/web/jest-puppeteer.config.js | 14 ++++
test_tipc/web/jest.config.js | 111 +++++++++++++++++++++++++
test_tipc/web/test.jpg | Bin 0 -> 287898 bytes
10 files changed, 392 insertions(+)
create mode 100644 test_tipc/docs/test_inference_js.md
create mode 100644 test_tipc/prepare_js.sh
create mode 100644 test_tipc/test_inference_js.sh
create mode 100644 test_tipc/web/expect.json
create mode 100644 test_tipc/web/index.html
create mode 100644 test_tipc/web/index.test.js
create mode 100644 test_tipc/web/jest-puppeteer.config.js
create mode 100644 test_tipc/web/jest.config.js
create mode 100644 test_tipc/web/test.jpg
diff --git a/.gitignore b/.gitignore
index 9d85e7a8c..caf886a2b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,5 @@ paddleocr.egg-info/
/deploy/android_demo/app/PaddleLite/
/deploy/android_demo/app/.cxx/
/deploy/android_demo/app/cache/
+test_tipc/web/models/
+test_tipc/web/node_modules/
diff --git a/test_tipc/docs/test_inference_js.md b/test_tipc/docs/test_inference_js.md
new file mode 100644
index 000000000..c0b7d653a
--- /dev/null
+++ b/test_tipc/docs/test_inference_js.md
@@ -0,0 +1,50 @@
+# Web 端基础预测功能测试
+
+Web 端主要基于 Jest-Puppeteer 完成 e2e 测试,其中 Puppeteer 操作 Chrome 完成推理流程,Jest 完成测试流程。
+>Puppeteer 是一个 Node 库,它提供了一个高级 API 来通过 DevTools 协议控制 Chromium 或 Chrome
+>Jest 是一个 JavaScript 测试框架,旨在确保任何 JavaScript 代码的正确性。
+#### 环境准备
+
+* 安装 Node(包含 npm ) (https://nodejs.org/zh-cn/download/)
+* 确认是否安装成功,在命令行执行
+```sh
+# 显示所安 node 版本号,即表示成功安装
+node -v
+```
+* 确认 npm 是否安装成成
+```sh
+# npm 随着 node 一起安装,一般无需额外安装
+# 显示所安 npm 版本号,即表示成功安装
+npm -v
+```
+
+#### 使用
+```sh
+# web 测试环境准备
+bash test_tipc/prepare_js.sh 'js_infer'
+# web 推理测试
+bash test_tipc/test_inference_js.sh
+```
+
+#### 流程设计
+
+###### paddlejs prepare
+ 1. 判断 node, npm 是否安装
+ 2. 下载测试模型,当前检测模型是 ch_PP-OCRv2_det_infer ,识别模型是 ch_PP-OCRv2_rec_infer[1, 3, 32, 320]。如果需要替换模型,可直接将模型文件放在test_tipc/web/models/目录下。
+ - 文本检测模型:https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
+ - 文本识别模型:https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
+ - 文本识别模型[1, 3, 32, 320]:https://paddlejs.bj.bcebos.com/models/ch_PP-OCRv2_rec_infer.tar
+ - 保证较为准确的识别效果,需要将文本识别模型导出为输入shape是[1, 3, 32, 320]的静态模型
+ 3. 转换模型, model.pdmodel model.pdiparams 转换为 model.json chunk.dat(检测模型保存地址:test_tipc/web/models/ch_PP-OCRv2/det,识别模型保存地址:test_tipc/web/models/ch_PP-OCRv2/rec)
+ 4. 安装最新版本 ocr sdk @paddlejs-models/ocr@latest
+ 5. 安装测试环境依赖 puppeteer、jest、jest-puppeteer,如果检查到已经安装,则不会进行二次安装
+
+ ###### paddlejs infer test
+ 1. Jest 执行 server command:`python3 -m http.server 9811` 开启本地服务
+ 2. 启动 Jest 测试服务,通过 jest-puppeteer 插件完成 chrome 操作,加载 @paddlejs-models/ocr 脚本完成推理流程
+ 3. 测试用例为原图识别后的文本结果与预期文本结果(expect.json)进行对比,测试通过有两个标准:
+ * 原图识别结果逐字符与预期结果对比,误差不超过 **10个字符**;
+ * 原图识别结果每个文本框字符内容与预期结果进行相似度对比,相似度不小于 0.9(全部一致则相似度为1)。
+
+ 只有满足上述两个标准,视为测试通过。通过为如下显示:
+
diff --git a/test_tipc/prepare_js.sh b/test_tipc/prepare_js.sh
new file mode 100644
index 000000000..7bcdeb35a
--- /dev/null
+++ b/test_tipc/prepare_js.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+set -o errexit
+set -o nounset
+shopt -s extglob
+
+# paddlejs prepare 主要流程
+# 1. 判断 node, npm 是否安装
+# 2. 下载测试模型,当前检测模型是 ch_PP-OCRv2_det_infer ,识别模型是 ch_PP-OCRv2_rec_infer [1, 3, 32, 320]。如果需要替换模型,可直接将模型文件放在test_tipc/web/models/目录下。
+# - 文本检测模型:https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
+# - 文本识别模型:https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
+# - 文本识别模型[1, 3, 32, 320]:https://paddlejs.bj.bcebos.com/models/ch_PP-OCRv2_rec_infer.tar
+# - 保证较为准确的识别效果,需要将文本识别模型导出为输入shape[1, 3, 32, 320]的静态模型
+# 3. 转换模型, model.pdmodel model.pdiparams 转换为 model.json chunk.dat(检测模型保存地址:test_tipc/web/models/ch_PP-OCRv2/det,识别模型保存地址:test_tipc/web/models/ch_PP-OCRv2/rec)
+# 4. 安装最新版本 ocr sdk @paddlejs-models/ocr@latest
+# 5. 安装测试环境依赖 puppeteer、jest、jest-puppeteer,如果检查到已经安装,则不会进行二次安装
+
+# 判断是否安装了node
+if ! type node >/dev/null 2>&1; then
+ echo -e "\033[31m node 未安装 \033[0m"
+ exit
+fi
+
+# 判断是否安装了npm
+if ! type npm >/dev/null 2>&1; then
+ echo -e "\033[31m npm 未安装 \033[0m"
+ exit
+fi
+
+# MODE be 'js_infer'
+MODE=$1
+# js_infer MODE , load model file and convert model to js_infer
+if [ ${MODE} != "js_infer" ];then
+ echo "Please change mode to 'js_infer'"
+ exit
+fi
+
+
+# saved_model_name
+det_saved_model_name=ch_PP-OCRv2_det_infer
+rec_saved_model_name=ch_PP-OCRv2_rec_infer
+
+# model_path
+model_path=test_tipc/web/models/
+
+rm -rf $model_path
+
+echo ${model_path}${det_saved_model_name}
+echo ${model_path}${rec_saved_model_name}
+
+# download ocr_det inference model
+wget -nc -P $model_path https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
+cd $model_path && tar xf ch_PP-OCRv2_det_infer.tar && cd ../../../
+
+# download ocr_rec inference model
+wget -nc -P $model_path https://paddlejs.bj.bcebos.com/models/ch_PP-OCRv2_rec_infer.tar
+cd $model_path && tar xf ch_PP-OCRv2_rec_infer.tar && cd ../../../
+
+MYDIR=`pwd`
+echo $MYDIR
+
+pip3 install paddlejsconverter
+
+# convert inference model to web model: model.json、chunk.dat
+paddlejsconverter \
+ --modelPath=$model_path$det_saved_model_name/inference.pdmodel \
+ --paramPath=$model_path$det_saved_model_name/inference.pdiparams \
+ --outputDir=$model_path$det_saved_model_name/ \
+
+paddlejsconverter \
+ --modelPath=$model_path$rec_saved_model_name/inference.pdmodel \
+ --paramPath=$model_path$rec_saved_model_name/inference.pdiparams \
+ --outputDir=$model_path$rec_saved_model_name/ \
+
+# always install latest ocr sdk
+cd test_tipc/web
+echo -e "\033[33m Installing the latest ocr sdk... \033[0m"
+npm install @paddlejs-models/ocr@latest
+npm info @paddlejs-models/ocr
+echo -e "\033[32m The latest ocr sdk installed completely.!~ \033[0m"
+
+# install dependencies
+if [ `npm list --dept 0 | grep puppeteer | wc -l` -ne 0 ] && [ `npm list --dept 0 | grep jest | wc -l` -ne 0 ];then
+ echo -e "\033[32m Dependencies have installed \033[0m"
+else
+ echo -e "\033[33m Installing dependencies ... \033[0m"
+ npm install jest jest-puppeteer puppeteer
+ echo -e "\033[32m Dependencies installed completely.!~ \033[0m"
+fi
+
+# del package-lock.json
+rm package-lock.json
diff --git a/test_tipc/test_inference_js.sh b/test_tipc/test_inference_js.sh
new file mode 100644
index 000000000..e96b18752
--- /dev/null
+++ b/test_tipc/test_inference_js.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -o errexit
+set -o nounset
+
+cd test_tipc/web
+# run ocr test in chrome
+./node_modules/.bin/jest --config ./jest.config.js
diff --git a/test_tipc/web/expect.json b/test_tipc/web/expect.json
new file mode 100644
index 000000000..a60c80a7c
--- /dev/null
+++ b/test_tipc/web/expect.json
@@ -0,0 +1,20 @@
+{
+ "text": [
+ "纯臻营养护发素",
+ "产品信息/参数",
+ "(45元/每公斤,100公斤起订)",
+ "每瓶22元,1000瓶起订)",
+ "【品牌】:代加工方式/OEMODM",
+ "【品名】:纯臻营养护发素",
+ "【产品编号】:YM-X-3011",
+ "ODMOEM",
+ "【净含量】:220ml",
+ "【适用人群】:适合所有肤质",
+ "【主要成分】:鲸蜡硬脂醇、燕麦β-葡聚",
+ "糖、椰油酰胺丙基甜菜碱、泛醌",
+ "(成品包材)",
+ "【主要功能】:可紧致头发磷层,从而达到",
+ "即时持久改善头发光泽的效果,给干燥的头",
+ "发足够的滋养"
+ ]
+}
diff --git a/test_tipc/web/index.html b/test_tipc/web/index.html
new file mode 100644
index 000000000..39921fbf0
--- /dev/null
+++ b/test_tipc/web/index.html
@@ -0,0 +1,13 @@
+
+
+
TlQflWWgd+%mxwB_m+ab~2dt3dUpAiY*&!Ht@L#e)s;^6Gbyl=ey
z =7Kgia(TZ8`i AIE>?LBG`lQ;
zf@POnV&j`9S8GU4FVvq0jE2BTTeHGJfgmsLAQ_Vtz3n+T)L7h!?}uFHmF0vvT(>fX
zXV=7A1HZ!!O^%EZDe;Q%@Xe};6qs@zx+2#~jU&{&z*c-a^BhfmJmDi`6i5ncy6^d;
z8>6Mv5B(TsM`9K%oV>TJ Jcw?PKBjp+w{<`H`yrGLoi9P*-^L}V_V!_X?s+2gD2x%YJT
z++`z6O&wjcs*)p3LKI1Xs?ou0YZdJ(8MR9erJOY6P>k}J@yK}^m*-!DV^&b;DIC{j
zCn{3jvw^(oQkXHaLPJ5Zk2;A!B8 b|vh{dygu~Gm5)xdfC25X<
zB~6Y^i!U9QuWJCAw<)-ZCx3AI@lk$)to)4A>Z|hMB
zG$xgYp<7w}kpo@^yN4zxE2WN1sAW>0@t~yc?IK`Iiwdd^?Y@Y_ztvTCO03f4U)kww
zG=*c-aL_$KchUva7$b}j=H&O6$%7y3JWeCyU5@t8fIKRoSgQ2R%VdMW*{q{Y+&lgV
zfii-n=&)iPv7RBYvTq!9k~-ZY1UIb$n&eh$T7^}vNtPo^Vhkr+$l{Dq6=6$sxOi^X
z_&jP{UR;RH0
`%bA#c4Ki*I46a)luvvdkd#%h!GO543U@R_;KA
z5-rl49C;
qZZGi=C(02H;=1pH*sb4
zCFZGpQ2Yspy)j}K$c-LE-i1~RpqtX7g_?pqjya8{pd=cZrVqn$a1;&@6cbdZQ)KoS
zCxcxCn86R%0Q*1$zy2C;t^7L-TQL?_*YV)s4({%Mgx%l~CW35&JVbGX8d|8;ma)1#
zg+|?yLk)ovnAV-MxoidW<9O;}Cm3Bda_bmM+hfBTNDL1jTqd=cDe;
zV63==W_N&YGvw$lQf?Lt7T+y*DK0MYQEEq(q6@<+r
z9PEFJX8Q<9Jcr533a-w)jcW_v#-*7hRH