Control print memory info (#3079)

* support ctrl print memory info

* support ctrl print memory info

* fix print log bug
pull/3108/head
changdazhou 2024-03-05 11:28:23 +08:00 committed by GitHub
parent 54767fdda4
commit d189384ab8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 8 additions and 4 deletions

View File

@ -62,11 +62,15 @@ def log_info(trainer, batch_size, epoch_id, iter_id):
eta_msg = "eta: {:s}".format(str(datetime.timedelta(seconds=int(eta_sec))))
max_mem_reserved_msg = ""
max_mem_allocated_msg = ""
if paddle.device.is_compiled_with_cuda():
max_mem_reserved_msg = f"max_mem_reserved: {format(paddle.device.cuda.max_memory_reserved() / (1024 ** 2), '.2f')} MB"
max_mem_allocated_msg = f"max_mem_allocated: {format(paddle.device.cuda.max_memory_allocated() / (1024 ** 2), '.2f')} MB"
max_mem_msg = ""
print_mem_info = trainer.config["Global"].get("print_mem_info", False)
if print_mem_info:
if paddle.device.is_compiled_with_cuda():
max_mem_reserved_msg = f"max_mem_reserved: {format(paddle.device.cuda.max_memory_reserved() / (1024 ** 2), '.2f')} MB"
max_mem_allocated_msg = f"max_mem_allocated: {format(paddle.device.cuda.max_memory_allocated() / (1024 ** 2), '.2f')} MB"
max_mem_msg = f", {max_mem_reserved_msg}, {max_mem_allocated_msg}"
logger.info(
f"[Train][Epoch {epoch_id}/{global_epochs}][Iter: {iter_id}/{trainer.iter_per_epoch}]{lr_msg}, {metric_msg}, {time_msg}, {ips_msg}, {eta_msg}, {max_mem_reserved_msg}, {max_mem_allocated_msg}"
f"[Train][Epoch {epoch_id}/{global_epochs}][Iter: {iter_id}/{trainer.iter_per_epoch}]{lr_msg}, {metric_msg}, {time_msg}, {ips_msg}, {eta_msg}{max_mem_msg}"
)
for i, lr in enumerate(trainer.lr_sch):