2021-06-21 20:20:25 +08:00
|
|
|
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
2020-05-10 16:26:57 +08:00
|
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
|
|
from __future__ import absolute_import
|
|
|
|
|
from __future__ import division
|
|
|
|
|
from __future__ import print_function
|
|
|
|
|
|
2020-10-13 17:13:33 +08:00
|
|
|
|
import os
|
2020-05-10 16:26:57 +08:00
|
|
|
|
import sys
|
2021-04-27 10:32:17 +08:00
|
|
|
|
import platform
|
2020-05-10 16:26:57 +08:00
|
|
|
|
import yaml
|
|
|
|
|
import time
|
2022-02-01 17:46:42 +08:00
|
|
|
|
import datetime
|
2020-10-13 17:13:33 +08:00
|
|
|
|
import paddle
|
|
|
|
|
import paddle.distributed as dist
|
|
|
|
|
from tqdm import tqdm
|
|
|
|
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
|
|
|
|
|
2020-05-10 16:26:57 +08:00
|
|
|
|
from ppocr.utils.stats import TrainingStats
|
|
|
|
|
from ppocr.utils.save_load import save_model
|
2022-02-01 17:46:42 +08:00
|
|
|
|
from ppocr.utils.utility import print_dict, AverageMeter
|
2020-11-04 20:43:27 +08:00
|
|
|
|
from ppocr.utils.logging import get_logger
|
2022-04-15 14:44:22 +08:00
|
|
|
|
from ppocr.utils.loggers import VDLLogger, WandbLogger, Loggers
|
2021-09-28 10:28:25 +08:00
|
|
|
|
from ppocr.utils import profiler
|
2020-11-04 20:43:27 +08:00
|
|
|
|
from ppocr.data import build_dataloader
|
2020-05-10 16:26:57 +08:00
|
|
|
|
|
2020-11-05 15:13:36 +08:00
|
|
|
|
|
2020-05-10 16:26:57 +08:00
|
|
|
|
class ArgsParser(ArgumentParser):
|
|
|
|
|
def __init__(self):
|
|
|
|
|
super(ArgsParser, self).__init__(
|
|
|
|
|
formatter_class=RawDescriptionHelpFormatter)
|
|
|
|
|
self.add_argument("-c", "--config", help="configuration file to use")
|
|
|
|
|
self.add_argument(
|
|
|
|
|
"-o", "--opt", nargs='+', help="set configuration options")
|
2021-09-28 10:01:37 +08:00
|
|
|
|
self.add_argument(
|
|
|
|
|
'-p',
|
|
|
|
|
'--profiler_options',
|
|
|
|
|
type=str,
|
|
|
|
|
default=None,
|
2022-02-01 17:46:42 +08:00
|
|
|
|
help='The option of profiler, which should be in format ' \
|
|
|
|
|
'\"key1=value1;key2=value2;key3=value3\".'
|
2021-09-28 10:01:37 +08:00
|
|
|
|
)
|
2020-05-10 16:26:57 +08:00
|
|
|
|
|
|
|
|
|
def parse_args(self, argv=None):
|
|
|
|
|
args = super(ArgsParser, self).parse_args(argv)
|
|
|
|
|
assert args.config is not None, \
|
|
|
|
|
"Please specify --config=configure_file_path."
|
|
|
|
|
args.opt = self._parse_opt(args.opt)
|
|
|
|
|
return args
|
|
|
|
|
|
|
|
|
|
def _parse_opt(self, opts):
|
|
|
|
|
config = {}
|
|
|
|
|
if not opts:
|
|
|
|
|
return config
|
|
|
|
|
for s in opts:
|
|
|
|
|
s = s.strip()
|
|
|
|
|
k, v = s.split('=')
|
|
|
|
|
config[k] = yaml.load(v, Loader=yaml.Loader)
|
|
|
|
|
return config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_config(file_path):
|
|
|
|
|
"""
|
|
|
|
|
Load config from yml/yaml file.
|
|
|
|
|
Args:
|
|
|
|
|
file_path (str): Path of the config file to be loaded.
|
|
|
|
|
Returns: global config
|
|
|
|
|
"""
|
|
|
|
|
_, ext = os.path.splitext(file_path)
|
|
|
|
|
assert ext in ['.yml', '.yaml'], "only support yaml files for now"
|
2022-01-05 19:03:45 +08:00
|
|
|
|
config = yaml.load(open(file_path, 'rb'), Loader=yaml.Loader)
|
|
|
|
|
return config
|
2020-05-10 16:26:57 +08:00
|
|
|
|
|
|
|
|
|
|
2022-01-05 19:03:45 +08:00
|
|
|
|
def merge_config(config, opts):
|
2020-05-10 16:26:57 +08:00
|
|
|
|
"""
|
|
|
|
|
Merge config into global config.
|
|
|
|
|
Args:
|
|
|
|
|
config (dict): Config to be merged.
|
|
|
|
|
Returns: global config
|
|
|
|
|
"""
|
2022-01-05 19:03:45 +08:00
|
|
|
|
for key, value in opts.items():
|
2020-05-10 16:26:57 +08:00
|
|
|
|
if "." not in key:
|
2022-01-05 19:03:45 +08:00
|
|
|
|
if isinstance(value, dict) and key in config:
|
|
|
|
|
config[key].update(value)
|
2020-05-10 16:26:57 +08:00
|
|
|
|
else:
|
2022-01-05 19:03:45 +08:00
|
|
|
|
config[key] = value
|
2020-05-10 16:26:57 +08:00
|
|
|
|
else:
|
|
|
|
|
sub_keys = key.split('.')
|
2020-06-17 16:11:29 +08:00
|
|
|
|
assert (
|
2022-01-05 19:03:45 +08:00
|
|
|
|
sub_keys[0] in config
|
2022-02-01 17:46:42 +08:00
|
|
|
|
), "the sub_keys can only be one of global_config: {}, but get: " \
|
|
|
|
|
"{}, please check your running command".format(
|
2022-01-05 19:03:45 +08:00
|
|
|
|
config.keys(), sub_keys[0])
|
|
|
|
|
cur = config[sub_keys[0]]
|
2020-05-10 16:26:57 +08:00
|
|
|
|
for idx, sub_key in enumerate(sub_keys[1:]):
|
|
|
|
|
if idx == len(sub_keys) - 2:
|
|
|
|
|
cur[sub_key] = value
|
|
|
|
|
else:
|
|
|
|
|
cur = cur[sub_key]
|
2022-01-05 19:03:45 +08:00
|
|
|
|
return config
|
2020-05-10 16:26:57 +08:00
|
|
|
|
|
|
|
|
|
|
2022-05-23 22:14:56 +08:00
|
|
|
|
def check_device(use_gpu, use_xpu=False):
|
2020-05-10 16:26:57 +08:00
|
|
|
|
"""
|
|
|
|
|
Log error and exit when set use_gpu=true in paddlepaddle
|
|
|
|
|
cpu version.
|
|
|
|
|
"""
|
2022-05-23 22:14:56 +08:00
|
|
|
|
err = "Config {} cannot be set as true while your paddle " \
|
|
|
|
|
"is not compiled with {} ! \nPlease try: \n" \
|
|
|
|
|
"\t1. Install paddlepaddle to run model on {} \n" \
|
|
|
|
|
"\t2. Set {} as false in config file to run " \
|
2020-05-10 16:26:57 +08:00
|
|
|
|
"model on CPU"
|
|
|
|
|
|
|
|
|
|
try:
|
2022-05-23 22:14:56 +08:00
|
|
|
|
if use_gpu and use_xpu:
|
|
|
|
|
print("use_xpu and use_gpu can not both be ture.")
|
2020-12-21 17:13:32 +08:00
|
|
|
|
if use_gpu and not paddle.is_compiled_with_cuda():
|
2022-05-23 22:14:56 +08:00
|
|
|
|
print(err.format("use_gpu", "cuda", "gpu", "use_gpu"))
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
if use_xpu and not paddle.device.is_compiled_with_xpu():
|
|
|
|
|
print(err.format("use_xpu", "xpu", "xpu", "use_xpu"))
|
2020-05-10 16:26:57 +08:00
|
|
|
|
sys.exit(1)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
2022-02-23 16:31:16 +08:00
|
|
|
|
def check_xpu(use_xpu):
|
|
|
|
|
"""
|
|
|
|
|
Log error and exit when set use_xpu=true in paddlepaddle
|
|
|
|
|
cpu/gpu version.
|
|
|
|
|
"""
|
|
|
|
|
err = "Config use_xpu cannot be set as true while you are " \
|
|
|
|
|
"using paddlepaddle cpu/gpu version ! \nPlease try: \n" \
|
|
|
|
|
"\t1. Install paddlepaddle-xpu to run model on XPU \n" \
|
|
|
|
|
"\t2. Set use_xpu as false in config file to run " \
|
|
|
|
|
"model on CPU/GPU"
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
if use_xpu and not paddle.is_compiled_with_xpu():
|
|
|
|
|
print(err)
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
2020-10-13 17:13:33 +08:00
|
|
|
|
def train(config,
|
2020-11-04 20:43:27 +08:00
|
|
|
|
train_dataloader,
|
|
|
|
|
valid_dataloader,
|
|
|
|
|
device,
|
2020-10-13 17:13:33 +08:00
|
|
|
|
model,
|
|
|
|
|
loss_class,
|
|
|
|
|
optimizer,
|
|
|
|
|
lr_scheduler,
|
|
|
|
|
post_process_class,
|
|
|
|
|
eval_class,
|
|
|
|
|
pre_best_model_dict,
|
|
|
|
|
logger,
|
2022-04-12 23:47:54 +08:00
|
|
|
|
log_writer=None,
|
2021-10-15 16:34:27 +08:00
|
|
|
|
scaler=None):
|
2020-10-13 17:13:33 +08:00
|
|
|
|
cal_metric_during_train = config['Global'].get('cal_metric_during_train',
|
|
|
|
|
False)
|
2022-02-07 20:19:25 +08:00
|
|
|
|
calc_epoch_interval = config['Global'].get('calc_epoch_interval', 1)
|
2020-05-10 16:26:57 +08:00
|
|
|
|
log_smooth_window = config['Global']['log_smooth_window']
|
|
|
|
|
epoch_num = config['Global']['epoch_num']
|
|
|
|
|
print_batch_step = config['Global']['print_batch_step']
|
|
|
|
|
eval_batch_step = config['Global']['eval_batch_step']
|
2021-09-29 09:59:43 +08:00
|
|
|
|
profiler_options = config['profiler_options']
|
2020-10-13 17:13:33 +08:00
|
|
|
|
|
2020-11-04 20:43:27 +08:00
|
|
|
|
global_step = 0
|
2021-04-27 10:13:21 +08:00
|
|
|
|
if 'global_step' in pre_best_model_dict:
|
|
|
|
|
global_step = pre_best_model_dict['global_step']
|
2020-07-07 10:35:17 +08:00
|
|
|
|
start_eval_step = 0
|
|
|
|
|
if type(eval_batch_step) == list and len(eval_batch_step) >= 2:
|
|
|
|
|
start_eval_step = eval_batch_step[0]
|
|
|
|
|
eval_batch_step = eval_batch_step[1]
|
2021-02-04 11:33:48 +08:00
|
|
|
|
if len(valid_dataloader) == 0:
|
|
|
|
|
logger.info(
|
2022-02-01 17:46:42 +08:00
|
|
|
|
'No Images in eval dataset, evaluation during training ' \
|
|
|
|
|
'will be disabled'
|
2021-02-04 11:33:48 +08:00
|
|
|
|
)
|
|
|
|
|
start_eval_step = 1e111
|
2020-07-07 10:35:17 +08:00
|
|
|
|
logger.info(
|
2022-02-01 17:46:42 +08:00
|
|
|
|
"During the training process, after the {}th iteration, " \
|
|
|
|
|
"an evaluation is run every {} iterations".
|
2020-07-07 10:35:17 +08:00
|
|
|
|
format(start_eval_step, eval_batch_step))
|
2020-05-10 16:26:57 +08:00
|
|
|
|
save_epoch_step = config['Global']['save_epoch_step']
|
|
|
|
|
save_model_dir = config['Global']['save_model_dir']
|
2020-05-13 16:05:00 +08:00
|
|
|
|
if not os.path.exists(save_model_dir):
|
|
|
|
|
os.makedirs(save_model_dir)
|
2020-10-13 17:13:33 +08:00
|
|
|
|
main_indicator = eval_class.main_indicator
|
|
|
|
|
best_model_dict = {main_indicator: 0}
|
|
|
|
|
best_model_dict.update(pre_best_model_dict)
|
|
|
|
|
train_stats = TrainingStats(log_smooth_window, ['lr'])
|
2021-01-22 11:15:56 +08:00
|
|
|
|
model_average = False
|
2020-10-13 17:13:33 +08:00
|
|
|
|
model.train()
|
|
|
|
|
|
2021-02-07 15:31:24 +08:00
|
|
|
|
use_srn = config['Architecture']['algorithm'] == "SRN"
|
2022-04-26 16:19:31 +08:00
|
|
|
|
extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"]
|
2022-04-27 13:19:55 +08:00
|
|
|
|
extra_input = False
|
2022-04-26 16:19:31 +08:00
|
|
|
|
if config['Architecture']['algorithm'] == 'Distillation':
|
2022-04-27 13:19:55 +08:00
|
|
|
|
for key in config['Architecture']["Models"]:
|
|
|
|
|
extra_input = extra_input or config['Architecture']['Models'][key][
|
|
|
|
|
'algorithm'] in extra_input_models
|
2022-04-26 16:19:31 +08:00
|
|
|
|
else:
|
|
|
|
|
extra_input = config['Architecture']['algorithm'] in extra_input_models
|
2021-08-19 17:31:02 +08:00
|
|
|
|
try:
|
2021-07-07 09:54:03 +08:00
|
|
|
|
model_type = config['Architecture']['model_type']
|
2021-08-19 17:31:02 +08:00
|
|
|
|
except:
|
2021-07-07 09:54:03 +08:00
|
|
|
|
model_type = None
|
2022-04-26 16:19:31 +08:00
|
|
|
|
|
2021-07-22 19:58:14 +08:00
|
|
|
|
algorithm = config['Architecture']['algorithm']
|
2021-02-07 15:31:24 +08:00
|
|
|
|
|
2022-01-05 19:03:45 +08:00
|
|
|
|
start_epoch = best_model_dict[
|
|
|
|
|
'start_epoch'] if 'start_epoch' in best_model_dict else 1
|
|
|
|
|
|
|
|
|
|
total_samples = 0
|
2022-02-01 17:46:42 +08:00
|
|
|
|
train_reader_cost = 0.0
|
|
|
|
|
train_batch_cost = 0.0
|
2022-01-05 19:03:45 +08:00
|
|
|
|
reader_start = time.time()
|
2022-02-01 17:46:42 +08:00
|
|
|
|
eta_meter = AverageMeter()
|
2022-01-05 19:03:45 +08:00
|
|
|
|
|
|
|
|
|
max_iter = len(train_dataloader) - 1 if platform.system(
|
|
|
|
|
) == "Windows" else len(train_dataloader)
|
2020-10-13 17:13:33 +08:00
|
|
|
|
|
2020-12-30 19:54:16 +08:00
|
|
|
|
for epoch in range(start_epoch, epoch_num + 1):
|
2022-01-05 19:03:45 +08:00
|
|
|
|
if train_dataloader.dataset.need_reset:
|
|
|
|
|
train_dataloader = build_dataloader(
|
|
|
|
|
config, 'Train', device, logger, seed=epoch)
|
|
|
|
|
max_iter = len(train_dataloader) - 1 if platform.system(
|
|
|
|
|
) == "Windows" else len(train_dataloader)
|
2020-10-13 17:13:33 +08:00
|
|
|
|
for idx, batch in enumerate(train_dataloader):
|
2021-09-28 10:01:37 +08:00
|
|
|
|
profiler.add_profiler_step(profiler_options)
|
2021-10-27 20:16:36 +08:00
|
|
|
|
train_reader_cost += time.time() - reader_start
|
2021-04-29 12:37:05 +08:00
|
|
|
|
if idx >= max_iter:
|
2020-10-13 17:13:33 +08:00
|
|
|
|
break
|
|
|
|
|
lr = optimizer.get_lr()
|
|
|
|
|
images = batch[0]
|
2021-02-07 15:31:24 +08:00
|
|
|
|
if use_srn:
|
2021-01-22 11:15:56 +08:00
|
|
|
|
model_average = True
|
2021-10-15 16:30:51 +08:00
|
|
|
|
|
|
|
|
|
# use amp
|
|
|
|
|
if scaler:
|
|
|
|
|
with paddle.amp.auto_cast():
|
|
|
|
|
if model_type == 'table' or extra_input:
|
|
|
|
|
preds = model(images, data=batch[1:])
|
|
|
|
|
else:
|
|
|
|
|
preds = model(images)
|
2020-12-30 16:15:49 +08:00
|
|
|
|
else:
|
2021-10-15 16:30:51 +08:00
|
|
|
|
if model_type == 'table' or extra_input:
|
|
|
|
|
preds = model(images, data=batch[1:])
|
2022-01-05 19:03:45 +08:00
|
|
|
|
elif model_type in ["kie", 'vqa']:
|
2021-12-18 16:04:10 +08:00
|
|
|
|
preds = model(batch)
|
2021-10-15 16:30:51 +08:00
|
|
|
|
else:
|
|
|
|
|
preds = model(images)
|
2022-01-05 19:03:45 +08:00
|
|
|
|
|
2020-10-13 17:13:33 +08:00
|
|
|
|
loss = loss_class(preds, batch)
|
|
|
|
|
avg_loss = loss['loss']
|
2021-10-15 16:30:51 +08:00
|
|
|
|
|
|
|
|
|
if scaler:
|
|
|
|
|
scaled_avg_loss = scaler.scale(avg_loss)
|
|
|
|
|
scaled_avg_loss.backward()
|
|
|
|
|
scaler.minimize(optimizer, scaled_avg_loss)
|
|
|
|
|
else:
|
|
|
|
|
avg_loss.backward()
|
|
|
|
|
optimizer.step()
|
2020-10-13 17:13:33 +08:00
|
|
|
|
optimizer.clear_grad()
|
2020-11-16 19:00:27 +08:00
|
|
|
|
|
2022-02-07 20:19:25 +08:00
|
|
|
|
if cal_metric_during_train and epoch % calc_epoch_interval == 0: # only rec and cls need
|
|
|
|
|
batch = [item.numpy() for item in batch]
|
|
|
|
|
if model_type in ['table', 'kie']:
|
|
|
|
|
eval_class(preds, batch)
|
|
|
|
|
else:
|
2022-04-26 16:19:31 +08:00
|
|
|
|
if config['Loss']['name'] in ['MultiLoss', 'MultiLoss_v2'
|
|
|
|
|
]: # for multi head loss
|
|
|
|
|
post_result = post_process_class(
|
|
|
|
|
preds['ctc'], batch[1]) # for CTC head out
|
|
|
|
|
else:
|
|
|
|
|
post_result = post_process_class(preds, batch[1])
|
2022-02-07 20:19:25 +08:00
|
|
|
|
eval_class(post_result, batch)
|
|
|
|
|
metric = eval_class.get_metric()
|
|
|
|
|
train_stats.update(metric)
|
|
|
|
|
|
2022-02-01 17:46:42 +08:00
|
|
|
|
train_batch_time = time.time() - reader_start
|
|
|
|
|
train_batch_cost += train_batch_time
|
|
|
|
|
eta_meter.update(train_batch_time)
|
2022-01-05 19:03:45 +08:00
|
|
|
|
global_step += 1
|
2021-10-27 20:16:36 +08:00
|
|
|
|
total_samples += len(images)
|
2020-11-16 19:00:27 +08:00
|
|
|
|
|
2020-11-04 20:43:27 +08:00
|
|
|
|
if not isinstance(lr_scheduler, float):
|
|
|
|
|
lr_scheduler.step()
|
2020-10-13 17:13:33 +08:00
|
|
|
|
|
|
|
|
|
# logger and visualdl
|
|
|
|
|
stats = {k: v.numpy().mean() for k, v in loss.items()}
|
|
|
|
|
stats['lr'] = lr
|
|
|
|
|
train_stats.update(stats)
|
|
|
|
|
|
2022-05-23 22:14:56 +08:00
|
|
|
|
|
2022-04-12 23:47:54 +08:00
|
|
|
|
if log_writer is not None and dist.get_rank() == 0:
|
|
|
|
|
log_writer.log_metrics(metrics=train_stats.get(), prefix="TRAIN", step=global_step)
|
2020-10-13 17:13:33 +08:00
|
|
|
|
|
2021-03-24 14:32:38 +08:00
|
|
|
|
if dist.get_rank() == 0 and (
|
|
|
|
|
(global_step > 0 and global_step % print_batch_step == 0) or
|
|
|
|
|
(idx >= len(train_dataloader) - 1)):
|
2020-10-13 17:13:33 +08:00
|
|
|
|
logs = train_stats.log()
|
2022-02-07 15:02:04 +08:00
|
|
|
|
|
2022-02-01 17:46:42 +08:00
|
|
|
|
eta_sec = ((epoch_num + 1 - epoch) * \
|
|
|
|
|
len(train_dataloader) - idx - 1) * eta_meter.avg
|
|
|
|
|
eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
|
|
|
|
|
strs = 'epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: ' \
|
|
|
|
|
'{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, ' \
|
2022-02-08 15:43:52 +08:00
|
|
|
|
'ips: {:.5f} samples/s, eta: {}'.format(
|
2022-02-01 17:46:42 +08:00
|
|
|
|
epoch, epoch_num, global_step, logs,
|
|
|
|
|
train_reader_cost / print_batch_step,
|
|
|
|
|
train_batch_cost / print_batch_step,
|
|
|
|
|
total_samples / print_batch_step,
|
|
|
|
|
total_samples / train_batch_cost, eta_sec_format)
|
2020-10-13 17:13:33 +08:00
|
|
|
|
logger.info(strs)
|
2022-01-05 19:03:45 +08:00
|
|
|
|
|
2021-10-27 20:16:36 +08:00
|
|
|
|
total_samples = 0
|
2022-02-01 17:46:42 +08:00
|
|
|
|
train_reader_cost = 0.0
|
|
|
|
|
train_batch_cost = 0.0
|
2020-10-13 17:13:33 +08:00
|
|
|
|
# eval
|
|
|
|
|
if global_step > start_eval_step and \
|
2022-02-01 17:46:42 +08:00
|
|
|
|
(global_step - start_eval_step) % eval_batch_step == 0 \
|
|
|
|
|
and dist.get_rank() == 0:
|
2021-01-22 11:15:56 +08:00
|
|
|
|
if model_average:
|
|
|
|
|
Model_Average = paddle.incubate.optimizer.ModelAverage(
|
|
|
|
|
0.15,
|
|
|
|
|
parameters=model.parameters(),
|
|
|
|
|
min_average_window=10000,
|
|
|
|
|
max_average_window=15625)
|
|
|
|
|
Model_Average.apply()
|
2021-02-07 15:31:24 +08:00
|
|
|
|
cur_metric = eval(
|
|
|
|
|
model,
|
|
|
|
|
valid_dataloader,
|
|
|
|
|
post_process_class,
|
|
|
|
|
eval_class,
|
2021-06-21 20:33:19 +08:00
|
|
|
|
model_type,
|
2021-09-28 11:51:01 +08:00
|
|
|
|
extra_input=extra_input)
|
2021-01-26 15:16:02 +08:00
|
|
|
|
cur_metric_str = 'cur metric, {}'.format(', '.join(
|
|
|
|
|
['{}: {}'.format(k, v) for k, v in cur_metric.items()]))
|
|
|
|
|
logger.info(cur_metric_str)
|
2020-10-13 17:13:33 +08:00
|
|
|
|
|
|
|
|
|
# logger metric
|
2022-04-12 23:47:54 +08:00
|
|
|
|
if log_writer is not None:
|
|
|
|
|
log_writer.log_metrics(metrics=cur_metric, prefix="EVAL", step=global_step)
|
|
|
|
|
|
2021-01-26 15:16:02 +08:00
|
|
|
|
if cur_metric[main_indicator] >= best_model_dict[
|
2020-10-13 17:13:33 +08:00
|
|
|
|
main_indicator]:
|
2021-01-26 15:16:02 +08:00
|
|
|
|
best_model_dict.update(cur_metric)
|
2020-10-13 17:13:33 +08:00
|
|
|
|
best_model_dict['best_epoch'] = epoch
|
|
|
|
|
save_model(
|
|
|
|
|
model,
|
|
|
|
|
optimizer,
|
|
|
|
|
save_model_dir,
|
|
|
|
|
logger,
|
2022-01-05 19:03:45 +08:00
|
|
|
|
config,
|
2020-10-13 17:13:33 +08:00
|
|
|
|
is_best=True,
|
|
|
|
|
prefix='best_accuracy',
|
|
|
|
|
best_model_dict=best_model_dict,
|
2021-04-27 10:13:21 +08:00
|
|
|
|
epoch=epoch,
|
|
|
|
|
global_step=global_step)
|
2021-01-26 15:16:02 +08:00
|
|
|
|
best_str = 'best metric, {}'.format(', '.join([
|
2020-10-13 17:13:33 +08:00
|
|
|
|
'{}: {}'.format(k, v) for k, v in best_model_dict.items()
|
|
|
|
|
]))
|
|
|
|
|
logger.info(best_str)
|
|
|
|
|
# logger best metric
|
2022-04-12 23:47:54 +08:00
|
|
|
|
if log_writer is not None:
|
|
|
|
|
log_writer.log_metrics(metrics={
|
|
|
|
|
"best_{}".format(main_indicator): best_model_dict[main_indicator]
|
2022-04-15 14:44:22 +08:00
|
|
|
|
}, prefix="EVAL", step=global_step)
|
|
|
|
|
|
|
|
|
|
log_writer.log_model(is_best=True, prefix="best_accuracy", metadata=best_model_dict)
|
2022-01-05 19:03:45 +08:00
|
|
|
|
|
2021-10-27 20:16:36 +08:00
|
|
|
|
reader_start = time.time()
|
2020-10-13 17:13:33 +08:00
|
|
|
|
if dist.get_rank() == 0:
|
|
|
|
|
save_model(
|
|
|
|
|
model,
|
|
|
|
|
optimizer,
|
|
|
|
|
save_model_dir,
|
|
|
|
|
logger,
|
2022-01-05 19:03:45 +08:00
|
|
|
|
config,
|
2020-10-13 17:13:33 +08:00
|
|
|
|
is_best=False,
|
|
|
|
|
prefix='latest',
|
|
|
|
|
best_model_dict=best_model_dict,
|
2021-04-27 10:13:21 +08:00
|
|
|
|
epoch=epoch,
|
|
|
|
|
global_step=global_step)
|
2022-04-12 23:47:54 +08:00
|
|
|
|
|
2022-04-15 21:43:22 +08:00
|
|
|
|
if log_writer is not None:
|
|
|
|
|
log_writer.log_model(is_best=False, prefix="latest")
|
2022-04-12 23:47:54 +08:00
|
|
|
|
|
2020-10-13 17:13:33 +08:00
|
|
|
|
if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0:
|
|
|
|
|
save_model(
|
|
|
|
|
model,
|
|
|
|
|
optimizer,
|
|
|
|
|
save_model_dir,
|
|
|
|
|
logger,
|
2022-01-05 19:03:45 +08:00
|
|
|
|
config,
|
2020-10-13 17:13:33 +08:00
|
|
|
|
is_best=False,
|
|
|
|
|
prefix='iter_epoch_{}'.format(epoch),
|
|
|
|
|
best_model_dict=best_model_dict,
|
2021-04-27 10:13:21 +08:00
|
|
|
|
epoch=epoch,
|
|
|
|
|
global_step=global_step)
|
2022-04-15 21:43:22 +08:00
|
|
|
|
if log_writer is not None:
|
|
|
|
|
log_writer.log_model(is_best=False, prefix='iter_epoch_{}'.format(epoch))
|
2022-04-12 23:47:54 +08:00
|
|
|
|
|
2021-01-26 15:16:02 +08:00
|
|
|
|
best_str = 'best metric, {}'.format(', '.join(
|
2020-10-13 17:13:33 +08:00
|
|
|
|
['{}: {}'.format(k, v) for k, v in best_model_dict.items()]))
|
|
|
|
|
logger.info(best_str)
|
2022-04-12 23:47:54 +08:00
|
|
|
|
if dist.get_rank() == 0 and log_writer is not None:
|
|
|
|
|
log_writer.close()
|
2020-05-10 16:26:57 +08:00
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
2021-06-22 12:24:14 +08:00
|
|
|
|
def eval(model,
|
|
|
|
|
valid_dataloader,
|
|
|
|
|
post_process_class,
|
|
|
|
|
eval_class,
|
2021-09-26 15:09:48 +08:00
|
|
|
|
model_type=None,
|
2021-09-28 11:51:01 +08:00
|
|
|
|
extra_input=False):
|
2020-10-13 17:13:33 +08:00
|
|
|
|
model.eval()
|
|
|
|
|
with paddle.no_grad():
|
|
|
|
|
total_frame = 0.0
|
|
|
|
|
total_time = 0.0
|
2021-10-27 20:16:36 +08:00
|
|
|
|
pbar = tqdm(
|
|
|
|
|
total=len(valid_dataloader),
|
|
|
|
|
desc='eval model:',
|
|
|
|
|
position=0,
|
|
|
|
|
leave=True)
|
2021-04-27 10:32:17 +08:00
|
|
|
|
max_iter = len(valid_dataloader) - 1 if platform.system(
|
|
|
|
|
) == "Windows" else len(valid_dataloader)
|
2020-10-13 17:13:33 +08:00
|
|
|
|
for idx, batch in enumerate(valid_dataloader):
|
2021-04-27 10:32:17 +08:00
|
|
|
|
if idx >= max_iter:
|
2020-10-13 17:13:33 +08:00
|
|
|
|
break
|
2020-11-06 18:56:53 +08:00
|
|
|
|
images = batch[0]
|
2020-10-13 17:13:33 +08:00
|
|
|
|
start = time.time()
|
2021-09-28 11:51:01 +08:00
|
|
|
|
if model_type == 'table' or extra_input:
|
2021-06-22 20:39:43 +08:00
|
|
|
|
preds = model(images, data=batch[1:])
|
2022-01-05 19:03:45 +08:00
|
|
|
|
elif model_type in ["kie", 'vqa']:
|
2021-10-11 10:35:26 +08:00
|
|
|
|
preds = model(batch)
|
2021-01-31 22:37:30 +08:00
|
|
|
|
else:
|
2021-10-09 18:03:52 +08:00
|
|
|
|
preds = model(images)
|
2022-01-05 19:03:45 +08:00
|
|
|
|
|
|
|
|
|
batch_numpy = []
|
|
|
|
|
for item in batch:
|
|
|
|
|
if isinstance(item, paddle.Tensor):
|
|
|
|
|
batch_numpy.append(item.numpy())
|
|
|
|
|
else:
|
|
|
|
|
batch_numpy.append(item)
|
2020-10-13 17:13:33 +08:00
|
|
|
|
# Obtain usable results from post-processing methods
|
|
|
|
|
total_time += time.time() - start
|
|
|
|
|
# Evaluate the results of the current batch
|
2021-10-11 10:35:26 +08:00
|
|
|
|
if model_type in ['table', 'kie']:
|
2022-01-05 19:03:45 +08:00
|
|
|
|
eval_class(preds, batch_numpy)
|
|
|
|
|
elif model_type in ['vqa']:
|
|
|
|
|
post_result = post_process_class(preds, batch_numpy)
|
|
|
|
|
eval_class(post_result, batch_numpy)
|
2021-06-16 16:47:33 +08:00
|
|
|
|
else:
|
2022-01-05 19:03:45 +08:00
|
|
|
|
post_result = post_process_class(preds, batch_numpy[1])
|
|
|
|
|
eval_class(post_result, batch_numpy)
|
2021-10-09 17:53:22 +08:00
|
|
|
|
|
2020-11-06 18:56:53 +08:00
|
|
|
|
pbar.update(1)
|
2020-10-13 17:13:33 +08:00
|
|
|
|
total_frame += len(images)
|
2021-01-26 15:16:02 +08:00
|
|
|
|
# Get final metric,eg. acc or hmean
|
|
|
|
|
metric = eval_class.get_metric()
|
2020-11-05 15:13:36 +08:00
|
|
|
|
|
2020-11-06 18:56:53 +08:00
|
|
|
|
pbar.close()
|
2020-10-13 17:13:33 +08:00
|
|
|
|
model.train()
|
2021-01-26 15:16:02 +08:00
|
|
|
|
metric['fps'] = total_frame / total_time
|
|
|
|
|
return metric
|
2020-08-15 21:54:59 +08:00
|
|
|
|
|
2020-08-15 12:39:07 +08:00
|
|
|
|
|
2021-10-12 13:41:47 +08:00
|
|
|
|
def update_center(char_center, post_result, preds):
|
|
|
|
|
result, label = post_result
|
|
|
|
|
feats, logits = preds
|
|
|
|
|
logits = paddle.argmax(logits, axis=-1)
|
|
|
|
|
feats = feats.numpy()
|
|
|
|
|
logits = logits.numpy()
|
|
|
|
|
|
|
|
|
|
for idx_sample in range(len(label)):
|
|
|
|
|
if result[idx_sample][0] == label[idx_sample][0]:
|
|
|
|
|
feat = feats[idx_sample]
|
|
|
|
|
logit = logits[idx_sample]
|
|
|
|
|
for idx_time in range(len(logit)):
|
|
|
|
|
index = logit[idx_time]
|
|
|
|
|
if index in char_center.keys():
|
|
|
|
|
char_center[index][0] = (
|
|
|
|
|
char_center[index][0] * char_center[index][1] +
|
|
|
|
|
feat[idx_time]) / (char_center[index][1] + 1)
|
|
|
|
|
char_center[index][1] += 1
|
|
|
|
|
else:
|
|
|
|
|
char_center[index] = [feat[idx_time], 1]
|
|
|
|
|
return char_center
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_center(model, eval_dataloader, post_process_class):
|
|
|
|
|
pbar = tqdm(total=len(eval_dataloader), desc='get center:')
|
|
|
|
|
max_iter = len(eval_dataloader) - 1 if platform.system(
|
|
|
|
|
) == "Windows" else len(eval_dataloader)
|
|
|
|
|
char_center = dict()
|
|
|
|
|
for idx, batch in enumerate(eval_dataloader):
|
|
|
|
|
if idx >= max_iter:
|
|
|
|
|
break
|
|
|
|
|
images = batch[0]
|
|
|
|
|
start = time.time()
|
|
|
|
|
preds = model(images)
|
|
|
|
|
|
|
|
|
|
batch = [item.numpy() for item in batch]
|
|
|
|
|
# Obtain usable results from post-processing methods
|
|
|
|
|
post_result = post_process_class(preds, batch[1])
|
|
|
|
|
|
|
|
|
|
#update char_center
|
|
|
|
|
char_center = update_center(char_center, post_result, preds)
|
|
|
|
|
pbar.update(1)
|
|
|
|
|
|
|
|
|
|
pbar.close()
|
|
|
|
|
for key in char_center.keys():
|
|
|
|
|
char_center[key] = char_center[key][0]
|
|
|
|
|
return char_center
|
|
|
|
|
|
|
|
|
|
|
2020-12-18 18:51:19 +08:00
|
|
|
|
def preprocess(is_train=False):
|
2020-08-15 21:54:59 +08:00
|
|
|
|
FLAGS = ArgsParser().parse_args()
|
2021-09-28 10:01:37 +08:00
|
|
|
|
profiler_options = FLAGS.profiler_options
|
2020-08-15 21:54:59 +08:00
|
|
|
|
config = load_config(FLAGS.config)
|
2022-01-05 19:03:45 +08:00
|
|
|
|
config = merge_config(config, FLAGS.opt)
|
2021-09-29 09:59:43 +08:00
|
|
|
|
profile_dic = {"profiler_options": FLAGS.profiler_options}
|
2022-01-05 19:03:45 +08:00
|
|
|
|
config = merge_config(config, profile_dic)
|
2020-08-15 21:54:59 +08:00
|
|
|
|
|
2021-09-27 19:43:36 +08:00
|
|
|
|
if is_train:
|
|
|
|
|
# save_config
|
|
|
|
|
save_model_dir = config['Global']['save_model_dir']
|
|
|
|
|
os.makedirs(save_model_dir, exist_ok=True)
|
|
|
|
|
with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
|
|
|
|
|
yaml.dump(
|
|
|
|
|
dict(config), f, default_flow_style=False, sort_keys=False)
|
|
|
|
|
log_file = '{}/train.log'.format(save_model_dir)
|
|
|
|
|
else:
|
|
|
|
|
log_file = None
|
2022-03-22 12:48:50 +08:00
|
|
|
|
logger = get_logger(log_file=log_file)
|
2020-08-15 21:54:59 +08:00
|
|
|
|
|
|
|
|
|
# check if set use_gpu=True in paddlepaddle cpu version
|
|
|
|
|
use_gpu = config['Global']['use_gpu']
|
2022-05-23 22:14:56 +08:00
|
|
|
|
use_xpu = config['Global'].get('use_xpu', False)
|
2020-08-15 21:54:59 +08:00
|
|
|
|
|
2022-02-23 16:31:16 +08:00
|
|
|
|
# check if set use_xpu=True in paddlepaddle cpu/gpu version
|
|
|
|
|
use_xpu = False
|
|
|
|
|
if 'use_xpu' in config['Global']:
|
|
|
|
|
use_xpu = config['Global']['use_xpu']
|
|
|
|
|
check_xpu(use_xpu)
|
|
|
|
|
|
2020-10-13 17:13:33 +08:00
|
|
|
|
alg = config['Architecture']['algorithm']
|
|
|
|
|
assert alg in [
|
2021-03-08 14:15:47 +08:00
|
|
|
|
'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN',
|
2021-09-27 15:06:06 +08:00
|
|
|
|
'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
|
2022-04-26 16:19:31 +08:00
|
|
|
|
'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'PREN', 'FCE', 'SVTR'
|
2020-10-13 17:13:33 +08:00
|
|
|
|
]
|
2020-08-15 21:54:59 +08:00
|
|
|
|
|
2022-02-23 16:31:16 +08:00
|
|
|
|
if use_xpu:
|
2022-05-23 22:14:56 +08:00
|
|
|
|
device = 'xpu:{0}'.format(os.getenv('FLAGS_selected_xpus', 0))
|
|
|
|
|
else:
|
|
|
|
|
device = 'gpu:{}'.format(dist.ParallelEnv()
|
|
|
|
|
.dev_id) if use_gpu else 'cpu'
|
|
|
|
|
check_device(use_gpu, use_xpu)
|
|
|
|
|
|
2020-10-13 17:13:33 +08:00
|
|
|
|
device = paddle.set_device(device)
|
2020-11-05 15:13:36 +08:00
|
|
|
|
|
2020-11-04 20:43:27 +08:00
|
|
|
|
config['Global']['distributed'] = dist.get_world_size() != 1
|
2021-09-27 19:43:36 +08:00
|
|
|
|
|
2022-04-15 14:44:22 +08:00
|
|
|
|
loggers = []
|
|
|
|
|
|
2022-04-15 17:44:42 +08:00
|
|
|
|
if 'use_visualdl' in config['Global'] and config['Global']['use_visualdl']:
|
2021-02-02 21:27:48 +08:00
|
|
|
|
save_model_dir = config['Global']['save_model_dir']
|
2020-11-04 20:43:27 +08:00
|
|
|
|
vdl_writer_path = '{}/vdl/'.format(save_model_dir)
|
2022-04-12 23:47:54 +08:00
|
|
|
|
log_writer = VDLLogger(save_model_dir)
|
2022-04-15 14:44:22 +08:00
|
|
|
|
loggers.append(log_writer)
|
2022-04-15 17:44:42 +08:00
|
|
|
|
if ('use_wandb' in config['Global'] and config['Global']['use_wandb']) or 'wandb' in config:
|
2022-04-12 23:47:54 +08:00
|
|
|
|
save_dir = config['Global']['save_model_dir']
|
|
|
|
|
wandb_writer_path = "{}/wandb".format(save_dir)
|
|
|
|
|
if "wandb" in config:
|
|
|
|
|
wandb_params = config['wandb']
|
|
|
|
|
else:
|
|
|
|
|
wandb_params = dict()
|
|
|
|
|
wandb_params.update({'save_dir': save_model_dir})
|
|
|
|
|
log_writer = WandbLogger(**wandb_params, config=config)
|
2022-04-15 14:44:22 +08:00
|
|
|
|
loggers.append(log_writer)
|
2020-11-04 20:43:27 +08:00
|
|
|
|
else:
|
2022-04-12 23:47:54 +08:00
|
|
|
|
log_writer = None
|
2020-11-04 20:43:27 +08:00
|
|
|
|
print_dict(config, logger)
|
2022-04-15 14:44:22 +08:00
|
|
|
|
|
|
|
|
|
if loggers:
|
|
|
|
|
log_writer = Loggers(loggers)
|
|
|
|
|
else:
|
|
|
|
|
log_writer = None
|
|
|
|
|
|
2020-11-04 20:43:27 +08:00
|
|
|
|
logger.info('train with paddle {} and device {}'.format(paddle.__version__,
|
|
|
|
|
device))
|
2022-04-12 23:47:54 +08:00
|
|
|
|
return config, device, logger, log_writer
|