Added functionality to use multiple loggers simultaneously and addded the english documentation on how to use them

pull/5957/head
Manan Goel 2022-04-15 06:44:22 +00:00
parent dde60d69be
commit 56fccb03c8
8 changed files with 99 additions and 16 deletions

View File

@ -0,0 +1,54 @@
## Logging metrics and models
PaddleOCR comes with two metric logging tools integrated directly into the training API: [VisualDL](https://readthedocs.org/projects/visualdl/) and [Weights & Biases](https://docs.wandb.ai/).
### VisualDL
VisualDL is a visualization analysis tool of PaddlePaddle. The integration allows all training metrics to be logged to a VisualDL dashboard. To use it, add the following line to the `Global` section of the config yaml file -
```
Global:
use_visualdl: True
```
To see the visualizations run the following command in your terminal
```shell
visualdl --logdir <save_model_dir>
```
Now open `localhost:8040` in your browser of choice!
### Weights & Biases
W&B is a MLOps tool that can be used for experiment tracking, dataset/model versioning, visualizing results and collaborating with colleagues. A W&B logger is integrated directly into PaddleOCR and to use it, first you need to install the `wandb` sdk and login to your wandb account.
```shell
pip install wandb
wandb login
```
If you do not have a wandb account, you can make one [here](https://wandb.ai/site).
To visualize and track your model training add the following flag to your config yaml file under the `Global` section -
```
Global:
use_wandb: True
```
To add more arguments to the `WandbLogger` listed [here](./config_en.md) add the header `wandb` to the yaml file and add the arguments under it -
```
wandb:
project: my_project
entity: my_team
```
This will automatically log all the training and evaluation metrics to the W&B dashboard along with models at every model saving step and evaluation step are with the appropriate tags and metadata.
![W&B Dashboard](../imgs_en/wandb_metrics.png)
![W&B Models](../imgs_en/wandb_models.png)
To view the dashboard, the link to the dashboard is printed to the console at the beginning and end of every training job and you can also access it by logging into your W&B account on your browser.
### Using Multiple Loggers
Both VisualDL and W&B can also be used simultaneously by just setting both the aforementioned flags to True.

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

View File

@ -1,2 +1,3 @@
from .vdl_logger import VDLLogger
from .wandb_logger import WandbLogger
from .wandb_logger import WandbLogger
from .loggers import Loggers

View File

@ -0,0 +1,18 @@
from .wandb_logger import WandbLogger
class Loggers(object):
def __init__(self, loggers):
super().__init__()
self.loggers = loggers
def log_metrics(self, metrics, prefix=None, step=None):
for logger in self.loggers:
logger.log_metrics(metrics, prefix=prefix, step=step)
def log_model(self, is_best, prefix, metadata=None):
for logger in self.loggers:
logger.log_model(is_best=is_best, prefix=prefix, metadata=metadata)
def close(self):
for logger in self.loggers:
logger.close()

View File

@ -13,6 +13,9 @@ class VDLLogger(BaseLogger):
for k, v in updated_metrics.items():
self.vdl_writer.add_scalar(k, v, step)
def log_model(self, is_best, prefix, metadata=None):
pass
def close(self):
self.vdl_writer.close()

View File

@ -59,8 +59,8 @@ class WandbLogger(BaseLogger):
def log_metrics(self, metrics, prefix=None, step=None):
if not prefix:
prefix = ""
updated_metrics = {prefix + "/" + k: v for k, v in metrics.items()}
updated_metrics = {prefix.lower() + "/" + k: v for k, v in metrics.items()}
self.run.log(updated_metrics, step=step)
def log_model(self, is_best, prefix, metadata=None):

View File

@ -31,7 +31,7 @@ from ppocr.utils.stats import TrainingStats
from ppocr.utils.save_load import save_model
from ppocr.utils.utility import print_dict, AverageMeter
from ppocr.utils.logging import get_logger
from ppocr.utils.loggers import VDLLogger, WandbLogger
from ppocr.utils.loggers import VDLLogger, WandbLogger, Loggers
from ppocr.utils import profiler
from ppocr.data import build_dataloader
@ -362,10 +362,9 @@ def train(config,
if log_writer is not None:
log_writer.log_metrics(metrics={
"best_{}".format(main_indicator): best_model_dict[main_indicator]
}, prefix="EVAL", step=global_step)
if isinstance(log_writer, WandbLogger):
log_writer.log_model(is_best=True, prefix="best_accuracy", metadata=best_model_dict)
}, prefix="EVAL", step=global_step)
log_writer.log_model(is_best=True, prefix="best_accuracy", metadata=best_model_dict)
reader_start = time.time()
if dist.get_rank() == 0:
@ -381,8 +380,7 @@ def train(config,
epoch=epoch,
global_step=global_step)
if isinstance(log_writer, WandbLogger):
log_writer.log_model(is_best=False, prefix="latest")
log_writer.log_model(is_best=False, prefix="latest")
if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0:
save_model(
@ -396,9 +394,8 @@ def train(config,
best_model_dict=best_model_dict,
epoch=epoch,
global_step=global_step)
if isinstance(log_writer, WandbLogger):
log_writer.log_model(is_best=False, prefix='iter_epoch_{}'.format(epoch))
log_writer.log_model(is_best=False, prefix='iter_epoch_{}'.format(epoch))
best_str = 'best metric, {}'.format(', '.join(
['{}: {}'.format(k, v) for k, v in best_model_dict.items()]))
@ -561,11 +558,14 @@ def preprocess(is_train=False):
config['Global']['distributed'] = dist.get_world_size() != 1
if "use_visualdl" in config['Global'] and config['Global']['use_visualdl'] and dist.get_rank() == 0:
loggers = []
if config['Global']['use_visualdl']:
save_model_dir = config['Global']['save_model_dir']
vdl_writer_path = '{}/vdl/'.format(save_model_dir)
log_writer = VDLLogger(save_model_dir)
elif ("use_wandb" in config['Global'] and config['Global']['use_wandb']) or "wandb" in config:
loggers.append(log_writer)
if config['Global']['use_wandb'] or 'wandb' in config:
save_dir = config['Global']['save_model_dir']
wandb_writer_path = "{}/wandb".format(save_dir)
if "wandb" in config:
@ -574,9 +574,16 @@ def preprocess(is_train=False):
wandb_params = dict()
wandb_params.update({'save_dir': save_model_dir})
log_writer = WandbLogger(**wandb_params, config=config)
loggers.append(log_writer)
else:
log_writer = None
print_dict(config, logger)
if loggers:
log_writer = Loggers(loggers)
else:
log_writer = None
logger.info('train with paddle {} and device {}'.format(paddle.__version__,
device))
return config, device, logger, log_writer