support benchmark for paddlepaddle3.0 (#13574)

pull/13581/head
changdazhou 2024-08-02 19:24:40 +08:00 committed by GitHub
parent d69bf81907
commit b6211b936b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 22 additions and 16 deletions

View File

@ -2,8 +2,6 @@ import os
from .base_logger import BaseLogger
from ppocr.utils.logging import get_logger
logger = get_logger()
class WandbLogger(BaseLogger):
def __init__(
@ -40,6 +38,7 @@ class WandbLogger(BaseLogger):
resume="allow",
)
self._wandb_init.update(**kwargs)
self.logger = get_logger()
_ = self.run
@ -50,7 +49,7 @@ class WandbLogger(BaseLogger):
def run(self):
if self._run is None:
if self.wandb.run is not None:
logger.info(
self.logger.info(
"There is a wandb run already in progress "
"and newly created instances of `WandbLogger` will reuse"
" this run. If this is not desired, call `wandb.finish()`"

View File

@ -26,7 +26,7 @@ logger_initialized = {}
@functools.lru_cache()
def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG):
def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG, log_ranks="0"):
"""Initialize and get a logger by name.
If the logger has not been initialized, this method will initialize the
logger by adding one or two handlers, otherwise the initialized logger will
@ -39,6 +39,7 @@ def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG):
log_level (int): The logger level. Note that only the process of
rank 0 is affected, and other processes will set the level to
"Error" thus be silent most of the time.
log_ranks (str): The ids of gpu to log which are separated by "," when more than 1, "0" by default.
Returns:
logging.Logger: The expected logger.
"""
@ -62,7 +63,13 @@ def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG):
file_handler = logging.FileHandler(log_file, "a")
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
if dist.get_rank() == 0:
if isinstance(log_ranks, str):
log_ranks = [int(i) for i in log_ranks.split(",")]
elif isinstance(log_ranks, int):
log_ranks = [log_ranks]
if dist.get_rank() in log_ranks:
logger.setLevel(log_level)
else:
logger.setLevel(logging.ERROR)

View File

@ -26,9 +26,6 @@ import random
from ppocr.utils.logging import get_logger
logger = get_logger()
def str2bool(v):
return v.lower() in ("true", "yes", "t", "y", "1")
@ -340,6 +337,7 @@ def get_infer_gpuid():
Returns:
int: The GPU ID to be used for inference.
"""
logger = get_logger()
if not paddle.device.is_compiled_with_rocm:
gpu_id_str = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
else:

View File

@ -204,6 +204,7 @@ def train(
eval_batch_step = config["Global"]["eval_batch_step"]
eval_batch_epoch = config["Global"].get("eval_batch_epoch", None)
profiler_options = config["profiler_options"]
print_mem_info = config["Global"].get("print_mem_info", True)
global_step = 0
if "global_step" in pre_best_model_dict:
@ -406,9 +407,8 @@ def train(
metrics=train_stats.get(), prefix="TRAIN", step=global_step
)
if dist.get_rank() == 0 and (
(global_step > 0 and global_step % print_batch_step == 0)
or (idx >= len(train_dataloader) - 1)
if (global_step > 0 and global_step % print_batch_step == 0) or (
idx >= len(train_dataloader) - 1
):
logs = train_stats.log()
@ -418,13 +418,13 @@ def train(
eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
max_mem_reserved_str = ""
max_mem_allocated_str = ""
if paddle.device.is_compiled_with_cuda():
max_mem_reserved_str = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
max_mem_allocated_str = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
if paddle.device.is_compiled_with_cuda() and print_mem_info:
max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
max_mem_allocated_str = f" max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
strs = (
"epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: "
"{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, "
"ips: {:.5f} samples/s, eta: {}, {} {}".format(
"ips: {:.5f} samples/s, eta: {}{}{}".format(
epoch,
epoch_num,
global_step,
@ -740,7 +740,9 @@ def preprocess(is_train=False):
log_file = "{}/train.log".format(save_model_dir)
else:
log_file = None
logger = get_logger(log_file=log_file)
log_ranks = config["Global"].get("log_ranks", "0")
logger = get_logger(log_file=log_file, log_ranks=log_ranks)
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu = config["Global"].get("use_gpu", False)