commit
68deaab13f
|
@ -15,13 +15,12 @@
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
# import Polygon
|
|
||||||
import shapely
|
import shapely
|
||||||
from shapely.geometry import Polygon
|
from shapely.geometry import Polygon
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import operator
|
import operator
|
||||||
import editdistance
|
import Levenshtein
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import copy
|
import copy
|
||||||
|
@ -95,7 +94,7 @@ def ed(args, str1, str2):
|
||||||
if args.ignore_case:
|
if args.ignore_case:
|
||||||
str1 = str1.lower()
|
str1 = str1.lower()
|
||||||
str2 = str2.lower()
|
str2 = str2.lower()
|
||||||
return editdistance.eval(str1, str2)
|
return Levenshtein.distance(str1, str2)
|
||||||
|
|
||||||
|
|
||||||
def convert_bbox_to_polygon(bbox):
|
def convert_bbox_to_polygon(bbox):
|
||||||
|
@ -115,8 +114,6 @@ def eval_e2e(args):
|
||||||
# pred
|
# pred
|
||||||
dt_results = parse_ser_results_fp(args.pred_json_path, "pred",
|
dt_results = parse_ser_results_fp(args.pred_json_path, "pred",
|
||||||
args.ignore_background)
|
args.ignore_background)
|
||||||
assert set(gt_results.keys()) == set(dt_results.keys())
|
|
||||||
|
|
||||||
iou_thresh = args.iou_thres
|
iou_thresh = args.iou_thres
|
||||||
num_gt_chars = 0
|
num_gt_chars = 0
|
||||||
gt_count = 0
|
gt_count = 0
|
||||||
|
@ -124,7 +121,7 @@ def eval_e2e(args):
|
||||||
hit = 0
|
hit = 0
|
||||||
ed_sum = 0
|
ed_sum = 0
|
||||||
|
|
||||||
for img_name in gt_results:
|
for img_name in dt_results:
|
||||||
gt_info = gt_results[img_name]
|
gt_info = gt_results[img_name]
|
||||||
gt_count += len(gt_info)
|
gt_count += len(gt_info)
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,9 @@ from ppocr.utils.logging import get_logger
|
||||||
|
|
||||||
def train(args):
|
def train(args):
|
||||||
logger = get_logger(log_file=os.path.join(args.output_dir, "train.log"))
|
logger = get_logger(log_file=os.path.join(args.output_dir, "train.log"))
|
||||||
|
rank = paddle.distributed.get_rank()
|
||||||
|
distributed = paddle.distributed.get_world_size() > 1
|
||||||
|
|
||||||
print_arguments(args, logger)
|
print_arguments(args, logger)
|
||||||
|
|
||||||
# Added here for reproducibility (even between python 2 and 3)
|
# Added here for reproducibility (even between python 2 and 3)
|
||||||
|
@ -45,7 +48,7 @@ def train(args):
|
||||||
pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
|
pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
|
||||||
|
|
||||||
# dist mode
|
# dist mode
|
||||||
if paddle.distributed.get_world_size() > 1:
|
if distributed:
|
||||||
paddle.distributed.init_parallel_env()
|
paddle.distributed.init_parallel_env()
|
||||||
|
|
||||||
tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
|
tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
|
||||||
|
@ -59,8 +62,8 @@ def train(args):
|
||||||
args.model_name_or_path)
|
args.model_name_or_path)
|
||||||
|
|
||||||
# dist mode
|
# dist mode
|
||||||
if paddle.distributed.get_world_size() > 1:
|
if distributed:
|
||||||
model = paddle.distributed.DataParallel(model)
|
model = paddle.DataParallel(model)
|
||||||
|
|
||||||
train_dataset = XFUNDataset(
|
train_dataset = XFUNDataset(
|
||||||
tokenizer,
|
tokenizer,
|
||||||
|
@ -90,8 +93,7 @@ def train(args):
|
||||||
|
|
||||||
train_sampler = paddle.io.DistributedBatchSampler(
|
train_sampler = paddle.io.DistributedBatchSampler(
|
||||||
train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True)
|
train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True)
|
||||||
args.train_batch_size = args.per_gpu_train_batch_size * \
|
|
||||||
max(1, paddle.distributed.get_world_size())
|
|
||||||
train_dataloader = paddle.io.DataLoader(
|
train_dataloader = paddle.io.DataLoader(
|
||||||
train_dataset,
|
train_dataset,
|
||||||
batch_sampler=train_sampler,
|
batch_sampler=train_sampler,
|
||||||
|
@ -136,7 +138,8 @@ def train(args):
|
||||||
args.per_gpu_train_batch_size))
|
args.per_gpu_train_batch_size))
|
||||||
logger.info(
|
logger.info(
|
||||||
" Total train batch size (w. parallel, distributed & accumulation) = {}".
|
" Total train batch size (w. parallel, distributed & accumulation) = {}".
|
||||||
format(args.train_batch_size * paddle.distributed.get_world_size()))
|
format(args.per_gpu_train_batch_size *
|
||||||
|
paddle.distributed.get_world_size()))
|
||||||
logger.info(" Total optimization steps = {}".format(t_total))
|
logger.info(" Total optimization steps = {}".format(t_total))
|
||||||
|
|
||||||
global_step = 0
|
global_step = 0
|
||||||
|
@ -170,7 +173,7 @@ def train(args):
|
||||||
global_step += 1
|
global_step += 1
|
||||||
total_samples += batch['image'].shape[0]
|
total_samples += batch['image'].shape[0]
|
||||||
|
|
||||||
if step % print_step == 0:
|
if rank == 0 and step % print_step == 0:
|
||||||
logger.info(
|
logger.info(
|
||||||
"epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {:.6f}, lr: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec".
|
"epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {:.6f}, lr: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec".
|
||||||
format(epoch, args.num_train_epochs, step,
|
format(epoch, args.num_train_epochs, step,
|
||||||
|
@ -185,38 +188,38 @@ def train(args):
|
||||||
train_run_cost = 0.0
|
train_run_cost = 0.0
|
||||||
total_samples = 0
|
total_samples = 0
|
||||||
|
|
||||||
if (paddle.distributed.get_rank() == 0 and args.eval_steps > 0 and
|
if rank == 0 and args.eval_steps > 0 and global_step % args.eval_steps == 0 and args.evaluate_during_training:
|
||||||
global_step % args.eval_steps == 0):
|
|
||||||
# Log metrics
|
# Log metrics
|
||||||
if (paddle.distributed.get_rank() == 0 and args.
|
# Only evaluate when single GPU otherwise metrics may not average well
|
||||||
evaluate_during_training): # Only evaluate when single GPU otherwise metrics may not average well
|
results = evaluate(model, eval_dataloader, logger)
|
||||||
results = evaluate(model, eval_dataloader, logger)
|
if results['f1'] >= best_metirc['f1']:
|
||||||
if results['f1'] >= best_metirc['f1']:
|
best_metirc = results
|
||||||
best_metirc = results
|
output_dir = os.path.join(args.output_dir, "best_model")
|
||||||
output_dir = os.path.join(args.output_dir, "best_model")
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
if distributed:
|
||||||
|
model._layers.save_pretrained(output_dir)
|
||||||
|
else:
|
||||||
model.save_pretrained(output_dir)
|
model.save_pretrained(output_dir)
|
||||||
tokenizer.save_pretrained(output_dir)
|
|
||||||
paddle.save(args,
|
|
||||||
os.path.join(output_dir,
|
|
||||||
"training_args.bin"))
|
|
||||||
logger.info("Saving model checkpoint to {}".format(
|
|
||||||
output_dir))
|
|
||||||
logger.info("eval results: {}".format(results))
|
|
||||||
logger.info("best_metirc: {}".format(best_metirc))
|
|
||||||
|
|
||||||
if paddle.distributed.get_rank() == 0:
|
|
||||||
# Save model checkpoint
|
|
||||||
output_dir = os.path.join(args.output_dir, "latest_model")
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
if paddle.distributed.get_rank() == 0:
|
|
||||||
model.save_pretrained(output_dir)
|
|
||||||
tokenizer.save_pretrained(output_dir)
|
tokenizer.save_pretrained(output_dir)
|
||||||
paddle.save(args,
|
paddle.save(args,
|
||||||
os.path.join(output_dir, "training_args.bin"))
|
os.path.join(output_dir, "training_args.bin"))
|
||||||
logger.info("Saving model checkpoint to {}".format(
|
logger.info("Saving model checkpoint to {}".format(
|
||||||
output_dir))
|
output_dir))
|
||||||
|
logger.info("eval results: {}".format(results))
|
||||||
|
logger.info("best_metirc: {}".format(best_metirc))
|
||||||
reader_start = time.time()
|
reader_start = time.time()
|
||||||
|
|
||||||
|
if rank == 0:
|
||||||
|
# Save model checkpoint
|
||||||
|
output_dir = os.path.join(args.output_dir, "latest_model")
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
if distributed:
|
||||||
|
model._layers.save_pretrained(output_dir)
|
||||||
|
else:
|
||||||
|
model.save_pretrained(output_dir)
|
||||||
|
tokenizer.save_pretrained(output_dir)
|
||||||
|
paddle.save(args, os.path.join(output_dir, "training_args.bin"))
|
||||||
|
logger.info("Saving model checkpoint to {}".format(output_dir))
|
||||||
logger.info("best_metirc: {}".format(best_metirc))
|
logger.info("best_metirc: {}".format(best_metirc))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,9 @@ from ppocr.utils.logging import get_logger
|
||||||
|
|
||||||
def train(args):
|
def train(args):
|
||||||
os.makedirs(args.output_dir, exist_ok=True)
|
os.makedirs(args.output_dir, exist_ok=True)
|
||||||
|
rank = paddle.distributed.get_rank()
|
||||||
|
distributed = paddle.distributed.get_world_size() > 1
|
||||||
|
|
||||||
logger = get_logger(log_file=os.path.join(args.output_dir, "train.log"))
|
logger = get_logger(log_file=os.path.join(args.output_dir, "train.log"))
|
||||||
print_arguments(args, logger)
|
print_arguments(args, logger)
|
||||||
|
|
||||||
|
@ -44,7 +47,7 @@ def train(args):
|
||||||
pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
|
pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
|
||||||
|
|
||||||
# dist mode
|
# dist mode
|
||||||
if paddle.distributed.get_world_size() > 1:
|
if distributed:
|
||||||
paddle.distributed.init_parallel_env()
|
paddle.distributed.init_parallel_env()
|
||||||
|
|
||||||
tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
|
tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
|
||||||
|
@ -59,7 +62,7 @@ def train(args):
|
||||||
args.model_name_or_path)
|
args.model_name_or_path)
|
||||||
|
|
||||||
# dist mode
|
# dist mode
|
||||||
if paddle.distributed.get_world_size() > 1:
|
if distributed:
|
||||||
model = paddle.DataParallel(model)
|
model = paddle.DataParallel(model)
|
||||||
|
|
||||||
train_dataset = XFUNDataset(
|
train_dataset = XFUNDataset(
|
||||||
|
@ -88,9 +91,6 @@ def train(args):
|
||||||
train_sampler = paddle.io.DistributedBatchSampler(
|
train_sampler = paddle.io.DistributedBatchSampler(
|
||||||
train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True)
|
train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True)
|
||||||
|
|
||||||
args.train_batch_size = args.per_gpu_train_batch_size * max(
|
|
||||||
1, paddle.distributed.get_world_size())
|
|
||||||
|
|
||||||
train_dataloader = paddle.io.DataLoader(
|
train_dataloader = paddle.io.DataLoader(
|
||||||
train_dataset,
|
train_dataset,
|
||||||
batch_sampler=train_sampler,
|
batch_sampler=train_sampler,
|
||||||
|
@ -134,7 +134,7 @@ def train(args):
|
||||||
args.per_gpu_train_batch_size)
|
args.per_gpu_train_batch_size)
|
||||||
logger.info(
|
logger.info(
|
||||||
" Total train batch size (w. parallel, distributed) = %d",
|
" Total train batch size (w. parallel, distributed) = %d",
|
||||||
args.train_batch_size * paddle.distributed.get_world_size(), )
|
args.per_gpu_train_batch_size * paddle.distributed.get_world_size(), )
|
||||||
logger.info(" Total optimization steps = %d", t_total)
|
logger.info(" Total optimization steps = %d", t_total)
|
||||||
|
|
||||||
global_step = 0
|
global_step = 0
|
||||||
|
@ -168,7 +168,7 @@ def train(args):
|
||||||
global_step += 1
|
global_step += 1
|
||||||
total_samples += batch['image'].shape[0]
|
total_samples += batch['image'].shape[0]
|
||||||
|
|
||||||
if step % print_step == 0:
|
if rank == 0 and step % print_step == 0:
|
||||||
logger.info(
|
logger.info(
|
||||||
"epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {:.6f}, lr: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec".
|
"epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {:.6f}, lr: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec".
|
||||||
format(epoch_id, args.num_train_epochs, step,
|
format(epoch_id, args.num_train_epochs, step,
|
||||||
|
@ -183,47 +183,43 @@ def train(args):
|
||||||
train_run_cost = 0.0
|
train_run_cost = 0.0
|
||||||
total_samples = 0
|
total_samples = 0
|
||||||
|
|
||||||
if (paddle.distributed.get_rank() == 0 and args.eval_steps > 0 and
|
if rank == 0 and args.eval_steps > 0 and global_step % args.eval_steps == 0 and args.evaluate_during_training:
|
||||||
global_step % args.eval_steps == 0):
|
|
||||||
# Log metrics
|
# Log metrics
|
||||||
# Only evaluate when single GPU otherwise metrics may not average well
|
# Only evaluate when single GPU otherwise metrics may not average well
|
||||||
if paddle.distributed.get_rank(
|
results, _ = evaluate(args, model, tokenizer, eval_dataloader,
|
||||||
) == 0 and args.evaluate_during_training:
|
label2id_map, id2label_map,
|
||||||
results, _ = evaluate(
|
pad_token_label_id, logger)
|
||||||
args, model, tokenizer, eval_dataloader, label2id_map,
|
|
||||||
id2label_map, pad_token_label_id, logger)
|
|
||||||
|
|
||||||
if best_metrics is None or results["f1"] >= best_metrics[
|
if best_metrics is None or results["f1"] >= best_metrics["f1"]:
|
||||||
"f1"]:
|
best_metrics = copy.deepcopy(results)
|
||||||
best_metrics = copy.deepcopy(results)
|
output_dir = os.path.join(args.output_dir, "best_model")
|
||||||
output_dir = os.path.join(args.output_dir, "best_model")
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
if distributed:
|
||||||
if paddle.distributed.get_rank() == 0:
|
model._layers.save_pretrained(output_dir)
|
||||||
model.save_pretrained(output_dir)
|
else:
|
||||||
tokenizer.save_pretrained(output_dir)
|
model.save_pretrained(output_dir)
|
||||||
paddle.save(
|
|
||||||
args,
|
|
||||||
os.path.join(output_dir, "training_args.bin"))
|
|
||||||
logger.info("Saving model checkpoint to %s",
|
|
||||||
output_dir)
|
|
||||||
|
|
||||||
logger.info("[epoch {}/{}][iter: {}/{}] results: {}".format(
|
|
||||||
epoch_id, args.num_train_epochs, step,
|
|
||||||
len(train_dataloader), results))
|
|
||||||
if best_metrics is not None:
|
|
||||||
logger.info("best metrics: {}".format(best_metrics))
|
|
||||||
|
|
||||||
if paddle.distributed.get_rank() == 0:
|
|
||||||
# Save model checkpoint
|
|
||||||
output_dir = os.path.join(args.output_dir, "latest_model")
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
if paddle.distributed.get_rank() == 0:
|
|
||||||
model.save_pretrained(output_dir)
|
|
||||||
tokenizer.save_pretrained(output_dir)
|
tokenizer.save_pretrained(output_dir)
|
||||||
paddle.save(args,
|
paddle.save(args,
|
||||||
os.path.join(output_dir, "training_args.bin"))
|
os.path.join(output_dir, "training_args.bin"))
|
||||||
logger.info("Saving model checkpoint to %s", output_dir)
|
logger.info("Saving model checkpoint to %s", output_dir)
|
||||||
|
|
||||||
|
logger.info("[epoch {}/{}][iter: {}/{}] results: {}".format(
|
||||||
|
epoch_id, args.num_train_epochs, step,
|
||||||
|
len(train_dataloader), results))
|
||||||
|
if best_metrics is not None:
|
||||||
|
logger.info("best metrics: {}".format(best_metrics))
|
||||||
reader_start = time.time()
|
reader_start = time.time()
|
||||||
|
if rank == 0:
|
||||||
|
# Save model checkpoint
|
||||||
|
output_dir = os.path.join(args.output_dir, "latest_model")
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
if distributed:
|
||||||
|
model._layers.save_pretrained(output_dir)
|
||||||
|
else:
|
||||||
|
model.save_pretrained(output_dir)
|
||||||
|
tokenizer.save_pretrained(output_dir)
|
||||||
|
paddle.save(args, os.path.join(output_dir, "training_args.bin"))
|
||||||
|
logger.info("Saving model checkpoint to %s", output_dir)
|
||||||
return global_step, tr_loss / global_step
|
return global_step, tr_loss / global_step
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue